diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index bc33da60deb67ed65bbdd9c3b06c8cbc368a3381..967f22975978c177aa53e5918cf27a3a7e49869d 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -37,6 +37,10 @@ enable_language(CXX)
 #####################################################################
 include(CheckCCompilerFlag)
 
+if (${CMAKE_CXX_COMPILER_ID} STREQUAL "Intel")
+  set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -restrict")
+endif()
+
 ########################################################################
 # User input options                                                   #
 ########################################################################
@@ -76,7 +80,7 @@ add_definitions(-DLAMMPS_MEMALIGN=${LAMMPS_MEMALIGN})
 option(LAMMPS_EXCEPTIONS "enable the use of C++ exceptions for error messages (useful for library interface)" OFF)
 if(LAMMPS_EXCEPTIONS)
   add_definitions(-DLAMMPS_EXCEPTIONS)
-  set(LAMMPS_API_DEFINES "${LAMMPS_API_DEFINES -DLAMMPS_EXCEPTIONS")
+  set(LAMMPS_API_DEFINES "${LAMMPS_API_DEFINES} -DLAMMPS_EXCEPTIONS")
 endif()
 
 set(LAMMPS_MACHINE "" CACHE STRING "Suffix to append to lmp binary and liblammps (WON'T enable any features automatically")
@@ -101,7 +105,7 @@ set(OTHER_PACKAGES KIM PYTHON MSCG MPIIO VORONOI POEMS LATTE
   USER-CGSDK USER-COLVARS USER-DIFFRACTION USER-DPD USER-DRUDE USER-EFF
   USER-FEP USER-H5MD USER-LB USER-MANIFOLD USER-MEAMC USER-MGPT USER-MISC
   USER-MOLFILE USER-NETCDF USER-PHONON USER-QTB USER-REAXC USER-SMD
-  USER-SMTBQ USER-SPH USER-TALLY USER-VTK USER-QUIP USER-QMMM)
+  USER-SMTBQ USER-SPH USER-TALLY USER-UEF USER-VTK USER-QUIP USER-QMMM)
 set(ACCEL_PACKAGES USER-OMP KOKKOS OPT USER-INTEL GPU)
 foreach(PKG ${DEFAULT_PACKAGES})
   option(ENABLE_${PKG} "Build ${PKG} Package" ${ENABLE_ALL})
@@ -665,7 +669,9 @@ include_directories(${LAMMPS_STYLE_HEADERS_DIR})
 ############################################
 add_library(lammps ${LIB_SOURCES})
 target_link_libraries(lammps ${LAMMPS_LINK_LIBS})
-add_dependencies(lammps ${LAMMPS_DEPS})
+if(LAMMPS_DEPS)
+  add_dependencies(lammps ${LAMMPS_DEPS})
+endif()
 set_target_properties(lammps PROPERTIES OUTPUT_NAME lammps${LAMMPS_MACHINE})
 if(BUILD_SHARED_LIBS)
   set_target_properties(lammps PROPERTIES SOVERSION ${SOVERSION})
diff --git a/doc/src/Eqs/bond_gromos.jpg b/doc/src/Eqs/bond_gromos.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..479e6b2d3b2ed907e9191564d8b8edbd42ea3f62
Binary files /dev/null and b/doc/src/Eqs/bond_gromos.jpg differ
diff --git a/doc/src/Eqs/bond_gromos.tex b/doc/src/Eqs/bond_gromos.tex
new file mode 100644
index 0000000000000000000000000000000000000000..2cd8c39535390e267735d2adca622468ae0decd1
--- /dev/null
+++ b/doc/src/Eqs/bond_gromos.tex
@@ -0,0 +1,10 @@
+\documentclass[12pt]{article}
+\pagestyle{empty}
+
+\begin{document}
+
+$$
+   E = K (r^2 - r_0^2)^2
+$$
+
+\end{document}
diff --git a/doc/src/Eqs/fix_rhok.jpg b/doc/src/Eqs/fix_rhok.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..829a866be449ecb0f23dc77f547c5b7d053c0469
Binary files /dev/null and b/doc/src/Eqs/fix_rhok.jpg differ
diff --git a/doc/src/Eqs/fix_rhok.tex b/doc/src/Eqs/fix_rhok.tex
new file mode 100644
index 0000000000000000000000000000000000000000..a468dfedc963e5d5616ca9baae019145bc421e51
--- /dev/null
+++ b/doc/src/Eqs/fix_rhok.tex
@@ -0,0 +1,11 @@
+\documentclass[12pt]{article}
+
+\begin{document}
+
+\begin{eqnarray*}
+ U &=&  \frac{1}{2} K (|\rho_{\vec{k}}| - a)^2 \\
+ \rho_{\vec{k}} &=& \sum_j^N \exp(-i\vec{k} \cdot \vec{r}_j )/\sqrt{N} \\
+ \vec{k} &=& (2\pi n_x /L_x , 2\pi n_y  /L_y , 2\pi n_z/L_z ) 
+\end{eqnarray*}
+
+\end{document}
diff --git a/doc/src/JPG/uef_frames.jpg b/doc/src/JPG/uef_frames.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3b3bfc3a245de1b00d594f865b9c93f1149deffd
Binary files /dev/null and b/doc/src/JPG/uef_frames.jpg differ
diff --git a/doc/src/JPG/user_intel.png b/doc/src/JPG/user_intel.png
index 7ec83b3207b06c4bbda7d56f2a7d9d94a15d115d..5061f1af2e26d9c2c1110390143d9ebf96946bd4 100755
Binary files a/doc/src/JPG/user_intel.png and b/doc/src/JPG/user_intel.png differ
diff --git a/doc/src/Manual.txt b/doc/src/Manual.txt
index 303e951fabbc65a9904e5af9f1b589c39e8234c7..c31ec1758799000049c2825da2c413a2790002c3 100644
--- a/doc/src/Manual.txt
+++ b/doc/src/Manual.txt
@@ -1,7 +1,7 @@
 <!-- HTML_ONLY -->
 <HEAD>
 <TITLE>LAMMPS Users Manual</TITLE>
-<META NAME="docnumber" CONTENT="22 Sep 2017 version">
+<META NAME="docnumber" CONTENT="23 Oct 2017 version">
 <META NAME="author" CONTENT="http://lammps.sandia.gov - Sandia National Laboratories">
 <META NAME="copyright" CONTENT="Copyright (2003) Sandia Corporation.  This software and manual is distributed under the GNU General Public License.">
 </HEAD>
@@ -21,7 +21,7 @@
 <H1></H1>
 
 LAMMPS Documentation :c,h3
-22 Sep 2017 version :c,h4
+23 Oct 2017 version :c,h4
 
 Version info: :h4
 
diff --git a/doc/src/PDF/colvars-refman-lammps.pdf b/doc/src/PDF/colvars-refman-lammps.pdf
index ad15752107cc006471cc4b3e7a400aa1658f26e2..daa1393269b953d569aff9846a135480dcdd42c7 100644
Binary files a/doc/src/PDF/colvars-refman-lammps.pdf and b/doc/src/PDF/colvars-refman-lammps.pdf differ
diff --git a/doc/src/Section_commands.txt b/doc/src/Section_commands.txt
index 0d46a01424ff0c986e5f9137b995a65ab11cbc3a..e816c8831b7c1a2ca9a1f99039b1f6ced7d08997 100644
--- a/doc/src/Section_commands.txt
+++ b/doc/src/Section_commands.txt
@@ -720,6 +720,8 @@ package"_Section_start.html#start_3.
 "nve/eff"_fix_nve_eff.html,
 "nvt/eff"_fix_nh_eff.html,
 "nvt/sllod/eff"_fix_nvt_sllod_eff.html,
+"npt/uef"_fix_nh_uef.html,
+"nvt/uef"_fix_nh_uef.html,
 "phonon"_fix_phonon.html,
 "pimd"_fix_pimd.html,
 "qbmsst"_fix_qbmsst.html,
@@ -728,6 +730,7 @@ package"_Section_start.html#start_3.
 "qtb"_fix_qtb.html,
 "reax/c/bonds"_fix_reax_bonds.html,
 "reax/c/species"_fix_reaxc_species.html,
+"rhok"_fix_rhok.html,
 "rx"_fix_rx.html,
 "saed/vtk"_fix_saed_vtk.html,
 "shardlow"_fix_shardlow.html,
@@ -856,6 +859,7 @@ package"_Section_start.html#start_3.
 "meso/t/atom"_compute_meso_t_atom.html,
 "pe/tally"_compute_tally.html,
 "pe/mol/tally"_compute_tally.html,
+"pressure/uef"_compute_pressure_uef.html,
 "saed"_compute_saed.html,
 "smd/contact/radius"_compute_smd_contact_radius.html,
 "smd/damage"_compute_smd_damage.html,
@@ -884,6 +888,7 @@ package"_Section_start.html#start_3.
 "temp/deform/eff"_compute_temp_deform_eff.html,
 "temp/region/eff"_compute_temp_region_eff.html,
 "temp/rotate"_compute_temp_rotate.html,
+"temp/uef"_compute_temp_uef.html,
 "xrd"_compute_xrd.html :tb(c=6,ea=c)
 
 :line
@@ -915,11 +920,12 @@ KOKKOS, o = USER-OMP, t = OPT.
 "born/coul/long/cs"_pair_born.html,
 "born/coul/msm (o)"_pair_born.html,
 "born/coul/wolf (go)"_pair_born.html,
+"born/coul/wolf/cs"_pair_born.html,
 "brownian (o)"_pair_brownian.html,
 "brownian/poly (o)"_pair_brownian.html,
-"buck (gkio)"_pair_buck.html,
-"buck/coul/cut (gkio)"_pair_buck.html,
-"buck/coul/long (gkio)"_pair_buck.html,
+"buck (giko)"_pair_buck.html,
+"buck/coul/cut (giko)"_pair_buck.html,
+"buck/coul/long (giko)"_pair_buck.html,
 "buck/coul/long/cs"_pair_buck.html,
 "buck/coul/msm (o)"_pair_buck.html,
 "buck/long/coul/long (o)"_pair_buck_long.html,
@@ -934,12 +940,13 @@ KOKKOS, o = USER-OMP, t = OPT.
 "coul/msm"_pair_coul.html,
 "coul/streitz"_pair_coul.html,
 "coul/wolf (ko)"_pair_coul.html,
-"dpd (go)"_pair_dpd.html,
+"coul/wolf/cs"_pair_coul.html,
+"dpd (gio)"_pair_dpd.html,
 "dpd/tstat (go)"_pair_dpd.html,
 "dsmc"_pair_dsmc.html,
-"eam (gkiot)"_pair_eam.html,
-"eam/alloy (gkiot)"_pair_eam.html,
-"eam/fs (gkiot)"_pair_eam.html,
+"eam (gikot)"_pair_eam.html,
+"eam/alloy (gikot)"_pair_eam.html,
+"eam/fs (gikot)"_pair_eam.html,
 "eim (o)"_pair_eim.html,
 "gauss (go)"_pair_gauss.html,
 "gayberne (gio)"_pair_gayberne.html,
@@ -953,9 +960,9 @@ KOKKOS, o = USER-OMP, t = OPT.
 "kim"_pair_kim.html,
 "lcbop"_pair_lcbop.html,
 "line/lj"_pair_line_lj.html,
-"lj/charmm/coul/charmm (kio)"_pair_charmm.html,
+"lj/charmm/coul/charmm (iko)"_pair_charmm.html,
 "lj/charmm/coul/charmm/implicit (ko)"_pair_charmm.html,
-"lj/charmm/coul/long (gkio)"_pair_charmm.html,
+"lj/charmm/coul/long (giko)"_pair_charmm.html,
 "lj/charmm/coul/msm"_pair_charmm.html,
 "lj/charmmfsw/coul/charmmfsh"_pair_charmm.html,
 "lj/charmmfsw/coul/long"_pair_charmm.html,
@@ -1005,9 +1012,9 @@ KOKKOS, o = USER-OMP, t = OPT.
 "resquared (go)"_pair_resquared.html,
 "snap"_pair_snap.html,
 "soft (go)"_pair_soft.html,
-"sw (gkio)"_pair_sw.html,
+"sw (giko)"_pair_sw.html,
 "table (gko)"_pair_table.html,
-"tersoff (gkio)"_pair_tersoff.html,
+"tersoff (giko)"_pair_tersoff.html,
 "tersoff/mod (gko)"_pair_tersoff_mod.html,
 "tersoff/mod/c (o)"_pair_tersoff_mod.html,
 "tersoff/zbl (gko)"_pair_tersoff_zbl.html,
@@ -1111,6 +1118,7 @@ KOKKOS, o = USER-OMP, t = OPT.
 "class2 (ko)"_bond_class2.html,
 "fene (iko)"_bond_fene.html,
 "fene/expand (o)"_bond_fene_expand.html,
+"gromos (o)"_bond_gromos.html,
 "harmonic (ko)"_bond_harmonic.html,
 "morse (o)"_bond_morse.html,
 "nonlinear (o)"_bond_nonlinear.html,
@@ -1177,7 +1185,7 @@ USER-OMP, t = OPT.
 "none"_dihedral_none.html,
 "zero"_dihedral_zero.html,
 "hybrid"_dihedral_hybrid.html,
-"charmm (ko)"_dihedral_charmm.html,
+"charmm (iko)"_dihedral_charmm.html,
 "charmmfsw"_dihedral_charmm.html,
 "class2 (ko)"_dihedral_class2.html,
 "harmonic (io)"_dihedral_harmonic.html,
@@ -1190,7 +1198,7 @@ used if "LAMMPS is built with the appropriate
 package"_Section_start.html#start_3.
 
 "cosine/shift/exp (o)"_dihedral_cosine_shift_exp.html,
-"fourier (o)"_dihedral_fourier.html,
+"fourier (io)"_dihedral_fourier.html,
 "nharmonic (o)"_dihedral_nharmonic.html,
 "quadratic (o)"_dihedral_quadratic.html,
 "spherical (o)"_dihedral_spherical.html,
@@ -1213,7 +1221,7 @@ USER-OMP, t = OPT.
 "hybrid"_improper_hybrid.html,
 "class2 (ko)"_improper_class2.html,
 "cvff (io)"_improper_cvff.html,
-"harmonic (ko)"_improper_harmonic.html,
+"harmonic (iko)"_improper_harmonic.html,
 "umbrella (o)"_improper_umbrella.html :tb(c=4,ea=c)
 
 These are additional improper styles in USER packages, which can be
diff --git a/doc/src/Section_packages.txt b/doc/src/Section_packages.txt
index d9a9fb4163a3d0186b2d803195b72be0e57ccf3d..912d371cd9451966dbe034c5cca214919e3062b6 100644
--- a/doc/src/Section_packages.txt
+++ b/doc/src/Section_packages.txt
@@ -150,6 +150,7 @@ Package, Description, Doc page, Example, Library
 "USER-SMTBQ"_#USER-SMTBQ, second moment tight binding QEq potential,"pair_style smtbq"_pair_smtbq.html, USER/smtbq, -
 "USER-SPH"_#USER-SPH, smoothed particle hydrodynamics,"SPH User Guide"_PDF/SPH_LAMMPS_userguide.pdf, USER/sph, -
 "USER-TALLY"_#USER-TALLY, pairwise tally computes,"compute XXX/tally"_compute_tally.html, USER/tally, -
+"USER-UEF"_#USER-UEF, extensional flow,"fix nvt/uef"_fix_nh_uef.html, USER/uef, -
 "USER-VTK"_#USER-VTK, dump output via VTK, "compute vtk"_dump_vtk.html, -, ext :tb(ea=c,ca1=l)
 
 :line
@@ -705,7 +706,7 @@ dynamics can be run with LAMMPS using density-functional tight-binding
 quantum forces calculated by LATTE.
 
 More information on LATTE can be found at this web site:
-"https://github.com/lanl/LATTE"_#latte_home.  A brief technical
+"https://github.com/lanl/LATTE"_latte_home.  A brief technical
 description is given with the "fix latte"_fix_latte.html command.
 
 :link(latte_home,https://github.com/lanl/LATTE)
@@ -728,6 +729,7 @@ make lib-latte args="-b"                # download and build in lib/latte/LATTE-
 make lib-latte args="-p $HOME/latte"    # use existing LATTE installation in $HOME/latte
 make lib-latte args="-b -m gfortran"    # download and build in lib/latte and 
                                         #   copy Makefile.lammps.gfortran to Makefile.lammps
+:pre
 
 Note that 3 symbolic (soft) links, "includelink" and "liblink" and
 "filelink", are created in lib/latte to point into the LATTE home dir.
@@ -2770,13 +2772,44 @@ examples/USER/tally :ul
 
 :line
 
+USER-UEF package :link(USER-UEF),h4
+
+[Contents:]
+
+A fix style for the integration of the equations of motion under
+extensional flow with proper boundary conditions, as well as several
+supporting compute styles and an output option.
+
+[Author:] David Nicholson (MIT).
+
+[Install or un-install:]
+
+make yes-user-uef
+make machine :pre
+
+make no-user-uef
+make machine :pre
+
+[Supporting info:]
+
+src/USER-UEF: filenames -> commands
+src/USER-UEF/README
+"fix nvt/uef"_fix_nh_uef.html
+"fix npt/uef"_fix_nh_uef.html
+"compute pressure/uef"_compute_pressure_uef.html
+"compute temp/uef"_compute_temp_uef.html
+"dump cfg/uef"_dump_cfg_uef.html
+examples/uef :ul
+
+:line
+
 USER-VTK package :link(USER-VTK),h4
 
 [Contents:]
 
-A "dump vtk"_dump_vtk.html command which outputs
-snapshot info in the "VTK format"_vtk, enabling visualization by
-"Paraview"_paraview or other visuzlization packages.
+A "dump vtk"_dump_vtk.html command which outputs snapshot info in the
+"VTK format"_vtk, enabling visualization by "Paraview"_paraview or
+other visuzlization packages.
 
 :link(vtk,http://www.vtk.org)
 :link(paraview,http://www.paraview.org)
diff --git a/doc/src/accelerate_intel.txt b/doc/src/accelerate_intel.txt
index 83e17b4f2769522e53d125cb7249801cfcc2932a..aaa38d7de28a218d85952dfb5de3dcb6c2de1095 100644
--- a/doc/src/accelerate_intel.txt
+++ b/doc/src/accelerate_intel.txt
@@ -25,14 +25,14 @@ LAMMPS to run on the CPU cores and coprocessor cores simultaneously.
 [Currently Available USER-INTEL Styles:]
 
 Angle Styles: charmm, harmonic :ulb,l
-Bond Styles: fene, harmonic :l
+Bond Styles: fene, fourier, harmonic :l
 Dihedral Styles: charmm, harmonic, opls :l
-Fixes: nve, npt, nvt, nvt/sllod :l
+Fixes: nve, npt, nvt, nvt/sllod, nve/asphere :l
 Improper Styles: cvff, harmonic :l
 Pair Styles: airebo, airebo/morse, buck/coul/cut, buck/coul/long, 
-buck, eam, eam/alloy, eam/fs, gayberne, lj/charmm/coul/charmm, 
-lj/charmm/coul/long, lj/cut, lj/cut/coul/long, lj/long/coul/long, rebo,
-sw, tersoff :l
+buck, dpd, eam, eam/alloy, eam/fs, gayberne, lj/charmm/coul/charmm, 
+lj/charmm/coul/long, lj/cut, lj/cut/coul/long, lj/long/coul/long, 
+rebo, sw, tersoff :l
 K-Space Styles: pppm, pppm/disp :l
 :ule
 
@@ -54,11 +54,12 @@ warmup run (for use with offload benchmarks).
 :c,image(JPG/user_intel.png)
 
 Results are speedups obtained on Intel Xeon E5-2697v4 processors
-(code-named Broadwell) and Intel Xeon Phi 7250 processors
-(code-named Knights Landing) with "June 2017" LAMMPS built with
-Intel Parallel Studio 2017 update 2. Results are with 1 MPI task
-per physical core. See {src/USER-INTEL/TEST/README} for the raw
-simulation rates and instructions to reproduce.
+(code-named Broadwell), Intel Xeon Phi 7250 processors (code-named
+Knights Landing), and Intel Xeon Gold 6148 processors (code-named
+Skylake) with "June 2017" LAMMPS built with Intel Parallel Studio
+2017 update 2. Results are with 1 MPI task per physical core. See
+{src/USER-INTEL/TEST/README} for the raw simulation rates and
+instructions to reproduce.
 
 :line
 
@@ -82,6 +83,11 @@ this order :l
 The {newton} setting applies to all atoms, not just atoms shared
 between MPI tasks :l
 Vectorization can change the order for adding pairwise forces :l
+When using the -DLMP_USE_MKL_RNG define (all included intel optimized
+makefiles do) at build time, the random number generator for
+dissipative particle dynamics (pair style dpd/intel) uses the Mersenne
+Twister generator included in the Intel MKL library (that should be
+more robust than the default Masaglia random number generator) :l
 :ule
 
 The precision mode (described below) used with the USER-INTEL
@@ -108,7 +114,7 @@ $t should be 2 for Intel Xeon CPUs and 2 or 4 for Intel Xeon Phi :l
 For some of the simple 2-body potentials without long-range
 electrostatics, performance and scalability can be better with
 the "newton off" setting added to the input script :l
-For simulations on higher node counts, add "processors * * * grid 
+For simulations on higher node counts, add "processors * * * grid
 numa" to the beginning of the input script for better scalability :l
 If using {kspace_style pppm} in the input script, add
 "kspace_modify diff ad" for better performance :l
@@ -119,8 +125,8 @@ For Intel Xeon Phi CPUs:
 Runs should be performed using MCDRAM. :ulb,l
 :ule
 
-For simulations using {kspace_style pppm} on Intel CPUs
-supporting AVX-512:
+For simulations using {kspace_style pppm} on Intel CPUs supporting
+AVX-512:
 
 Add "kspace_modify diff ad" to the input script :ulb,l
 The command-line option should be changed to
@@ -237,14 +243,17 @@ However, if you do not have coprocessors on your system, building
 without offload support will produce a smaller binary.
 
 The general requirements for Makefiles with the USER-INTEL package
-are as follows. "-DLAMMPS_MEMALIGN=64" is required for CCFLAGS. When
-using Intel compilers, "-restrict" is required and "-qopenmp" is
-highly recommended for CCFLAGS and LINKFLAGS. LIB should include
-"-ltbbmalloc". For builds supporting offload, "-DLMP_INTEL_OFFLOAD"
-is required for CCFLAGS and "-qoffload" is required for LINKFLAGS.
-Other recommended CCFLAG options for best performance are
-"-O2 -fno-alias -ansi-alias -qoverride-limits fp-model fast=2
--no-prec-div".
+are as follows. When using Intel compilers, "-restrict" is required 
+and "-qopenmp" is highly recommended for CCFLAGS and LINKFLAGS. 
+CCFLAGS should include "-DLMP_INTEL_USELRT" (unless POSIX Threads
+are not supported in the build environment) and "-DLMP_USE_MKL_RNG"
+(unless Intel Math Kernel Library (MKL) is not available in the build
+environment). For Intel compilers, LIB should include "-ltbbmalloc" 
+or if the library is not available, "-DLMP_INTEL_NO_TBB" can be added
+to CCFLAGS. For builds supporting offload, "-DLMP_INTEL_OFFLOAD" is
+required for CCFLAGS and "-qoffload" is required for LINKFLAGS. Other
+recommended CCFLAG options for best performance are "-O2 -fno-alias
+-ansi-alias -qoverride-limits fp-model fast=2 -no-prec-div".
 
 NOTE: The vectorization and math capabilities can differ depending on
 the CPU. For Intel compilers, the "-x" flag specifies the type of
diff --git a/doc/src/atom_modify.txt b/doc/src/atom_modify.txt
index d5c82f16ac951ebd86c3194658bfcc0826c1cb9f..1dc0fa6bfb6410df5eaef9abf35cebb3a66749fd 100644
--- a/doc/src/atom_modify.txt
+++ b/doc/src/atom_modify.txt
@@ -16,7 +16,7 @@ atom_modify keyword values ... :pre
 one or more keyword/value pairs may be appended :ulb,l
 keyword = {id} or {map} or {first} or {sort} :l
    {id} value = {yes} or {no}
-   {map} value = {array} or {hash}
+   {map} value = {yes} or {array} or {hash}
    {first} value = group-ID = group whose atoms will appear first in internal atom lists
    {sort} values = Nfreq binsize
      Nfreq = sort atoms spatially every this many time steps
@@ -25,8 +25,8 @@ keyword = {id} or {map} or {first} or {sort} :l
 
 [Examples:]
 
-atom_modify map hash
-atom_modify map array sort 10000 2.0
+atom_modify map yes
+atom_modify map hash sort 10000 2.0
 atom_modify first colloid :pre
 
 [Description:]
@@ -62,29 +62,33 @@ switch.  This is described in "Section 2.2"_Section_start.html#start_2
 of the manual.  If atom IDs are not used, they must be specified as 0
 for all atoms, e.g. in a data or restart file.
 
-The {map} keyword determines how atom ID lookup is done for molecular
-atom styles.  Lookups are performed by bond (angle, etc) routines in
-LAMMPS to find the local atom index associated with a global atom ID.
-
-When the {array} value is used, each processor stores a lookup table
-of length N, where N is the largest atom ID in the system.  This is a
+The {map} keyword determines how atoms with specific IDs are found
+when required.  An example are the bond (angle, etc) methods which
+need to find the local index of an atom with a specific global ID
+which is a bond (angle, etc) partner.  LAMMPS performs this operation
+efficiently by creating a "map", which is either an {array} or {hash}
+table, as descibed below.
+
+When the {map} keyword is not specified in your input script, LAMMPS
+only creates a map for "atom_styles"_atom_style.html for molecular
+systems which have permanent bonds (angles, etc).  No map is created
+for atomic systems, since it is normally not needed.  However some
+LAMMPS commands require a map, even for atomic systems, and will
+generate an error if one does not exist.  The {map} keyword thus
+allows you to force the creation of a map.  The {yes} value will
+create either an {array} or {hash} style map, as explained in the next
+paragraph.  The {array} and {hash} values create an atom-style or
+hash-style map respectively.
+
+For an {array}-style map, each processor stores a lookup table of
+length N, where N is the largest atom ID in the system.  This is a
 fast, simple method for many simulations, but requires too much memory
-for large simulations.  The {hash} value uses a hash table to perform
-the lookups.  This can be slightly slower than the {array} method, but
-its memory cost is proportional to the number of atoms owned by a
-processor, i.e. N/P when N is the total number of atoms in the system
-and P is the number of processors.
-
-When this setting is not specified in your input script, LAMMPS
-creates a map, if one is needed, as an array or hash.  See the
-discussion of default values below for how LAMMPS chooses which kind
-of map to build.  Note that atomic systems do not normally need to
-create a map.  However, even in this case some LAMMPS commands will
-create a map to find atoms (and then destroy it), or require a
-permanent map.  An example of the former is the "velocity loop
-all"_velocity.html command, which uses a map when looping over all
-atoms and insuring the same velocity values are assigned to an atom
-ID, no matter which processor owns it.
+for large simulations.  For a {hash}-style map, a hash table is
+created on each processor, which finds an atom ID in constant time
+(independent of the global number of atom IDs).  It can be slightly
+slower than the {array} map, but its memory cost is proportional to
+the number of atoms owned by a processor, i.e. N/P when N is the total
+number of atoms in the system and P is the number of processors.
 
 The {first} keyword allows a "group"_group.html to be specified whose
 atoms will be maintained as the first atoms in each processor's list
diff --git a/doc/src/bond_gromos.txt b/doc/src/bond_gromos.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc3ff75878f36dd27f2d85b61ae3bfb522f52583
--- /dev/null
+++ b/doc/src/bond_gromos.txt
@@ -0,0 +1,73 @@
+"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
+
+:link(lws,http://lammps.sandia.gov)
+:link(ld,Manual.html)
+:link(lc,Section_commands.html#comm)
+
+:line
+
+bond_style gromos command :h3
+bond_style gromos/omp command :h3
+
+[Syntax:]
+
+bond_style gromos :pre
+
+[Examples:]
+
+bond_style gromos
+bond_coeff 5 80.0 1.2 :pre
+
+[Description:]
+
+The {gromos} bond style uses the potential
+
+:c,image(Eqs/bond_gromos.jpg)
+
+where r0 is the equilibrium bond distance.  Note that the usual 1/4
+factor is included in K.
+
+The following coefficients must be defined for each bond type via the
+"bond_coeff"_bond_coeff.html command as in the example above, or in
+the data file or restart files read by the "read_data"_read_data.html
+or "read_restart"_read_restart.html commands:
+
+K (energy/distance^4)
+r0 (distance) :ul
+
+:line
+
+Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are
+functionally the same as the corresponding style without the suffix.
+They have been optimized to run faster, depending on your available
+hardware, as discussed in "Section 5"_Section_accelerate.html
+of the manual.  The accelerated styles take the same arguments and
+should produce the same results, except for round-off and precision
+issues.
+
+These accelerated styles are part of the GPU, USER-INTEL, KOKKOS,
+USER-OMP and OPT packages, respectively.  They are only enabled if
+LAMMPS was built with those packages.  See the "Making
+LAMMPS"_Section_start.html#start_3 section for more info.
+
+You can specify the accelerated styles explicitly in your input script
+by including their suffix, or you can use the "-suffix command-line
+switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can
+use the "suffix"_suffix.html command in your input script.
+
+See "Section 5"_Section_accelerate.html of the manual for
+more instructions on how to use the accelerated styles effectively.
+
+:line
+
+[Restrictions:]
+
+This bond style can only be used if LAMMPS was built with the
+MOLECULE package.  See the "Making
+LAMMPS"_Section_start.html#start_3 section for more info on packages.
+
+[Related commands:]
+
+"bond_coeff"_bond_coeff.html, "delete_bonds"_delete_bonds.html
+
+[Default:] none
diff --git a/doc/src/bonds.txt b/doc/src/bonds.txt
index 169d56ecbe4e7c75fce57abd58078dff9404a0cb..d33515eb88e36c3aca1525e5426a6cc80cbdeae8 100644
--- a/doc/src/bonds.txt
+++ b/doc/src/bonds.txt
@@ -8,6 +8,7 @@ Bond Styles :h1
    bond_class2
    bond_fene
    bond_fene_expand
+   bond_gromos
    bond_harmonic
    bond_harmonic_shift
    bond_harmonic_shift_cut
diff --git a/doc/src/commands.txt b/doc/src/commands.txt
index 06752f6960af88e82a389977681e81a29ac89ba7..5fb06f20118156b03bef56819c9a47442d5a30af 100644
--- a/doc/src/commands.txt
+++ b/doc/src/commands.txt
@@ -32,6 +32,7 @@ Commands :h1
    dimension
    displace_atoms
    dump
+   dump_cfg_uef
    dump_h5md
    dump_image
    dump_modify
diff --git a/doc/src/compute_pressure_uef.txt b/doc/src/compute_pressure_uef.txt
new file mode 100644
index 0000000000000000000000000000000000000000..065fc044417601a052d0d9fd060ababbf9aa405c
--- /dev/null
+++ b/doc/src/compute_pressure_uef.txt
@@ -0,0 +1,61 @@
+"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
+
+:link(lws,http://lammps.sandia.gov)
+:link(ld,Manual.html)
+:link(lc,Section_commands.html#comm)
+
+:line
+
+compute pressure/uef command :h3
+
+[Syntax:]
+
+compute ID group-ID pressure/uef temp-ID keyword ... :pre
+
+ID, group-ID are documented in "compute"_compute.html command
+pressure/uef = style name of this compute command
+temp-ID = ID of compute that calculates temperature, can be NULL if not needed
+zero or more keywords may be appended
+keyword = {ke} or {pair} or {bond} or {angle} or {dihedral} or {improper} or {kspace} or {fix} or {virial} :ul
+
+[Examples:]
+
+compute 1 all pressure/uef my_temp_uef
+compute 2 all pressure/uef my_temp_uef virial :pre
+
+[Description:]
+
+This command is used to compute the pressure tensor in  
+the reference frame of the applied flow field when
+"fix nvt/uef"_fix_nh_uef.html" or 
+"fix npt/uef"_fix_nh_uef.html" is used. 
+It is not necessary to use this command to compute the scalar
+value of the pressure. A "compute pressure"_compute_pressure.html
+may be used for that purpose.
+
+The keywords and output information are documented in 
+"compute_pressure"_compute_pressure.html.
+
+[Restrictions:]
+
+This fix is part of the USER-UEF package. It is only enabled if
+LAMMPS was built with that package. See the
+"Making LAMMPS"_Section_start.html#start_3 section for more info.
+
+This command can only be used when "fix nvt/uef"_fix_nh_uef.html
+or "fix npt/uef"_fix_nh_uef.html is active.
+
+The kinetic contribution to the pressure tensor
+will be accurate only when 
+the compute specificed by {temp-ID} is a 
+"compute temp/uef"_compute_temp_uef.html.
+
+[Related commands:]
+
+"compute pressure"_compute_pressure.html,
+"fix nvt/uef"_fix_nh_uef.html,
+"compute temp/uef"_compute_temp_uef.html
+
+[Default:] none
+
+
diff --git a/doc/src/compute_temp_uef.txt b/doc/src/compute_temp_uef.txt
new file mode 100644
index 0000000000000000000000000000000000000000..acd3a6218d9e39d25a73be4d7562ee8127ae9cfa
--- /dev/null
+++ b/doc/src/compute_temp_uef.txt
@@ -0,0 +1,52 @@
+"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
+
+:link(lws,http://lammps.sandia.gov)
+:link(ld,Manual.html)
+:link(lc,Section_commands.html#comm)
+
+:line
+
+compute temp/uef command :h3
+
+[Syntax:]
+
+compute ID group-ID temp/uef :pre
+
+ID, group-ID are documented in "compute"_compute.html command
+temp/uef = style name of this compute command :ul
+
+[Examples:]
+
+compute 1 all temp/uef 
+compute 2 sel temp/uef :pre
+
+[Description:]
+
+This command is used to compute the kinetic energy tensor in 
+the reference frame of the applied flow field when
+"fix nvt/uef"_fix_nh_uef.html" or
+"fix npt/uef"_fix_nh_uef.html" is used.
+It is not necessary to use this command to compute the scalar
+value of the temperature. A "compute temp"_compute_temp.html 
+may be used for that purpose.
+
+Output information for this command can be found in the
+documentation for "compute temp"_compute_temp.html.
+
+[Restrictions:]
+
+This fix is part of the USER-UEF package. It is only enabled if 
+LAMMPS was built with that package. See the 
+"Making LAMMPS"_Section_start.html#start_3 section for more info.
+
+This command can only be used when "fix nvt/uef"_fix_nh_uef.html 
+or "fix npt/uef"_fix_nh_uef.html is active.
+
+[Related commands:]
+
+"compute temp"_compute_temp.html,
+"fix nvt/uef"_fix_nh_uef.html,
+"compute pressure/uef"_compute_pressure_uef.html
+
+
+[Default:] none
diff --git a/doc/src/computes.txt b/doc/src/computes.txt
index c443bfaba2376bd5244ad0eef735d72fb9a388f6..1b64e2e5b46393313fc74099ef9bbf04da946b74 100644
--- a/doc/src/computes.txt
+++ b/doc/src/computes.txt
@@ -65,6 +65,7 @@ Computes :h1
    compute_pe_atom
    compute_plasticity_atom
    compute_pressure
+   compute_pressure_uef
    compute_property_atom
    compute_property_chunk
    compute_property_local
@@ -114,6 +115,7 @@ Computes :h1
    compute_temp_region_eff
    compute_temp_rotate
    compute_temp_sphere
+   compute_temp_uef
    compute_ti
    compute_torque_chunk
    compute_vacf
diff --git a/doc/src/dihedral_fourier.txt b/doc/src/dihedral_fourier.txt
index da892b59daaf8eda8e540f9a26f850e55877b4aa..0accbb22bf80bfb89e1b77bd7fa8760e56ad3e55 100644
--- a/doc/src/dihedral_fourier.txt
+++ b/doc/src/dihedral_fourier.txt
@@ -7,6 +7,7 @@
 :line
 
 dihedral_style fourier command :h3
+dihedral_style fourier/intel command :h3
 dihedral_style fourier/omp command :h3
 
 [Syntax:]
diff --git a/doc/src/dump_cfg_uef.txt b/doc/src/dump_cfg_uef.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e257f9c4f16d2f768ee4e688f3d71e0a0dd0c1cd
--- /dev/null
+++ b/doc/src/dump_cfg_uef.txt
@@ -0,0 +1,53 @@
+ "LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
+
+:link(lws,http://lammps.sandia.gov)
+:link(ld,Manual.html)
+:link(lc,Section_commands.html#comm)
+
+:line
+
+dump cfg/uef command :h3
+
+[Syntax:]
+
+dump ID group-ID cfg/uef N file mass type xs ys zs args :pre
+
+ID = user-assigned name for the dump :ulb,l
+group-ID = ID of the group of atoms to be dumped :l
+N = dump every this many timesteps :l
+file = name of file to write dump info to :l
+args = same as args for "dump custom"_dump.html :pre
+
+:ule
+
+[Examples:]
+
+dump 1 all cfg/uef 10 dump.*.cfg mass type xs ys zs
+dump 2 all cfg/uef 100 dump.*.cfg mass type xs ys zs id c_stress :pre
+
+[Description:]
+
+This command is used to dump atomic coordinates in the
+reference frame of the applied flow field when 
+"fix nvt/uef"_fix_nh_uef.html or
+"fix npt/uef"_fix_nh_uef.html or is used. Only the atomic 
+coordinates and frame-invariant scalar quantities 
+will be in the flow frame. If velocities are selected
+as output, for example, they will not be in the same
+reference frame as the atomic positions.
+
+[Restrictions:]
+
+This fix is part of the USER-UEF package. It is only enabled if
+LAMMPS was built with that package. See the
+"Making LAMMPS"_Section_start.html#start_3 section for more info.
+
+This command can only be used when "fix nvt/uef"_fix_nh_uef.html
+or "fix npt/uef"_fix_nh_uef.html is active.
+
+[Related commands:]
+
+"dump"_dump.html,
+"fix nvt/uef"_fix_nh_uef.html
+
+[Default:] none
diff --git a/doc/src/dump_modify.txt b/doc/src/dump_modify.txt
index 2ea1da3db3b6295531b913bc9e25665a9c4c7ba7..db727c2d4f7bdec779f196524a71a7dfd2ea3193 100644
--- a/doc/src/dump_modify.txt
+++ b/doc/src/dump_modify.txt
@@ -15,8 +15,9 @@ dump_modify dump-ID keyword values ... :pre
 dump-ID = ID of dump to modify :ulb,l
 one or more keyword/value pairs may be appended :l
 these keywords apply to various dump styles :l
-keyword = {append} or {buffer} or {element} or {every} or {fileper} or {first} or {flush} or {format} or {image} or {label} or {nfile} or {pad} or {precision} or {region} or {scale} or {sort} or {thresh} or {unwrap} :l
-  {append} arg = {yes} or {no} or {at} N
+keyword = {append} or {at} or {buffer} or {element} or {every} or {fileper} or {first} or {flush} or {format} or {image} or {label} or {nfile} or {pad} or {precision} or {region} or {scale} or {sort} or {thresh} or {unwrap} :l
+  {append} arg = {yes} or {no}
+  {at} arg = N
     N = index of frame written upon first dump
   {buffer} arg = {yes} or {no}
   {element} args = E1 E2 ... EN, where N = # of atom types
@@ -141,13 +142,18 @@ and {dcd}.  It also applies only to text output files, not to binary
 or gzipped or image/movie files.  If specified as {yes}, then dump
 snapshots are appended to the end of an existing dump file.  If
 specified as {no}, then a new dump file will be created which will
-overwrite an existing file with the same name.  If the {at} option is present
-({netcdf} only), then the frame to append to can be specified.  Negative values
-are counted from the end of the file.  This keyword can only take effect if the
-dump_modify command is used after the "dump"_dump.html command, but before the
-first command that causes dump snapshots to be output, e.g. a "run"_run.html or
-"minimize"_minimize.html command.  Once the dump file has been opened, this
-keyword has no further effect.
+overwrite an existing file with the same name.
+
+:line
+
+The {at} keyword only applies to the {netcdf} dump style.  It can only
+be used if the {append yes} keyword is also used.  The {N} argument is
+the index of which frame to append to.  A negative value can be
+specified for {N}, which means a frame counted from the end of the
+file.  The {at} keyword can only be used if the dump_modify command is
+before the first command that causes dump snapshots to be output,
+e.g. a "run"_run.html or "minimize"_minimize.html command.  Once the
+dump file has been opened, this keyword has no further effect.
 
 :line
 
diff --git a/doc/src/dump_netcdf.txt b/doc/src/dump_netcdf.txt
index 63568137a65ec5d6891db69a9d7cf33d1be1a098..70111a36a8b28d9654db6779426090b7e37b507d 100644
--- a/doc/src/dump_netcdf.txt
+++ b/doc/src/dump_netcdf.txt
@@ -25,7 +25,8 @@ args = list of atom attributes, same as for "dump_style custom"_dump.html :l,ule
 
 dump 1 all netcdf 100 traj.nc type x y z vx vy vz
 dump_modify 1 append yes at -1 thermo yes
-dump 1 all netcdf/mpiio 1000 traj.nc id type x y z :pre
+dump 1 all netcdf/mpiio 1000 traj.nc id type x y z
+dump 1 all netcdf 1000 traj.*.nc id type x y z :pre
 
 [Description:]
 
@@ -73,4 +74,3 @@ section for more info.
 [Related commands:]
 
 "dump"_dump.html, "dump_modify"_dump_modify.html, "undump"_undump.html
-
diff --git a/doc/src/fix_deform.txt b/doc/src/fix_deform.txt
index 63d872eded4d2e4c6a70a936e427d899115e3463..c870c73bdccf918288589c5141ba4dbafcc60434 100644
--- a/doc/src/fix_deform.txt
+++ b/doc/src/fix_deform.txt
@@ -86,11 +86,16 @@ Change the volume and/or shape of the simulation box during a dynamics
 run.  Orthogonal simulation boxes have 3 adjustable parameters
 (x,y,z).  Triclinic (non-orthogonal) simulation boxes have 6
 adjustable parameters (x,y,z,xy,xz,yz).  Any or all of them can be
-adjusted independently and simultaneously by this command.  This fix
-can be used to perform non-equilibrium MD (NEMD) simulations of a
-continuously strained system.  See the "fix
+adjusted independently and simultaneously by this command.  
+
+This fix can be used to perform non-equilibrium MD (NEMD) simulations
+of a continuously strained system.  See the "fix
 nvt/sllod"_fix_nvt_sllod.html and "compute
-temp/deform"_compute_temp_deform.html commands for more details.
+temp/deform"_compute_temp_deform.html commands for more details.  Note
+that simulation of a continuously extended system (extensional flow)
+can be modeled using the "USER-UEF
+package"_Section_packages.html#USER-UEF and its "fix
+commands"_fix_nh_uef.html.
 
 For the {x}, {y}, {z} parameters, the associated dimension cannot be
 shrink-wrapped.  For the {xy}, {yz}, {xz} parameters, the associated
diff --git a/doc/src/fix_latte.txt b/doc/src/fix_latte.txt
index f78e13b866560403a2dd7276005c1fc698796377..4edd610546350ac082789fb517af918c7167c1f3 100644
--- a/doc/src/fix_latte.txt
+++ b/doc/src/fix_latte.txt
@@ -66,7 +66,7 @@ reference charge of overlapping atom-centered densities and bond
 integrals are parameterized using a Slater-Koster tight-binding
 approach. This procedure, which usually is referred to as the DFTB
 method has been described in detail by ("Elstner"_#Elstner) and
-("Finnis"_#Finnis) and coworkers. 
+("Finnis"_#Finnis2) and coworkers. 
 
 The work of the LATTE developers follows that of Elstner closely with
 respect to the physical model.  However, the development of LATTE is
@@ -173,7 +173,7 @@ M. Haugk, T. Frauenheim, S. Suhai, and G. Seifert, Phys. Rev. B, 58,
 M. Haugk, T. Frauenheim, S. Suhai, and G. Seifert, Phys. Rev. B, 58,
 7260 (1998).
 
-:link(Finnis)
+:link(Finnis2)
 [(Finnis)] M. W. Finnis, A. T. Paxton, M. Methfessel, and M. van
 Schilfgarde, Phys. Rev. Lett., 81, 5149 (1998).
 
@@ -197,11 +197,11 @@ J. Sci. Comput. 36 (2), 147-170, (2014).
 [(Niklasson2014)] A. M. N. Niklasson and M. Cawkwell, J. Chem. Phys.,
 141, 164123, (2014).
 
-:link(Niklasson2014)
+:link(Niklasson2017)
 [(Niklasson2017)] A. M. N. Niklasson, J. Chem. Phys., 147, 054103 (2017).
 
-:link(Niklasson2012)
-[(Niklasson2017)] A. M. N. Niklasson, M. J. Cawkwell, Phys. Rev. B, 86
+:link(Cawkwell2012)
+[(Cawkwell2012)] A. M. N. Niklasson, M. J. Cawkwell, Phys. Rev. B, 86
 (17), 174308 (2012).
 
 :link(Negre2016)
diff --git a/doc/src/fix_neb.txt b/doc/src/fix_neb.txt
index 52d8a7df84da725f0c37af433966446cf2720604..73b3e312665785ed7420f9b8ceb557e95be61bc4 100644
--- a/doc/src/fix_neb.txt
+++ b/doc/src/fix_neb.txt
@@ -93,7 +93,7 @@ intermediate replica with the previous and the next image:
 
 Fnudge_parallel = {Kspring} * (|Ri+1 - Ri| - |Ri - Ri-1|) :pre
 
-Note that in this case the specified {Kspring) is in force/distance
+Note that in this case the specified {Kspring} is in force/distance
 units.
 
 With a value of {ideal}, the spring force is computed as suggested in
@@ -105,7 +105,7 @@ where RD is the "reaction coordinate" see "neb"_neb.html section, and
 RDideal is the ideal RD for which all the images are equally spaced.
 I.e. RDideal = (I-1)*meanDist when the climbing replica is off, where
 I is the replica number).  The meanDist is the average distance
-between replicas.  Note that in this case the specified {Kspring) is
+between replicas.  Note that in this case the specified {Kspring} is
 in force units.
 
 Note that the {ideal} form of nudging can often be more effective at
diff --git a/doc/src/fix_nh.txt b/doc/src/fix_nh.txt
index 8fa30ac22289d7fe39ff6f1d0c314990dfb52c79..41d0e6438fc9e05d08aa43d1548a98c0856ffd90 100644
--- a/doc/src/fix_nh.txt
+++ b/doc/src/fix_nh.txt
@@ -393,32 +393,36 @@ thermostatting and barostatting.
 :line
 
 These fixes compute a temperature and pressure each timestep.  To do
-this, the fix creates its own computes of style "temp" and "pressure",
-as if one of these two sets of commands had been issued:
+this, the thermostat and barostat fixes create their own computes of
+style "temp" and "pressure", as if one of these sets of commands had
+been issued:
 
+For fix nvt:
 compute fix-ID_temp group-ID temp
-compute fix-ID_press group-ID pressure fix-ID_temp :pre
 
+For fix npt and fix nph:
 compute fix-ID_temp all temp
 compute fix-ID_press all pressure fix-ID_temp :pre
 
-See the "compute temp"_compute_temp.html and "compute
-pressure"_compute_pressure.html commands for details.  Note that the
-IDs of the new computes are the fix-ID + underscore + "temp" or fix_ID
-+ underscore + "press".  For fix nvt, the group for the new computes
-is the same as the fix group.  For fix nph and fix npt, the group for
-the new computes is "all" since pressure is computed for the entire
-system.
+For fix nvt, the group for the new temperature compute is the same as
+the fix group.  For fix npt and fix nph, the group for both the new
+temperature and pressure compute is "all" since pressure is computed
+for the entire system.  In the case of fix nph, the temperature
+compute is not used for thermostatting, but just for a kinetic-energy
+contribution to the pressure.  See the "compute
+temp"_compute_temp.html and "compute pressure"_compute_pressure.html
+commands for details.  Note that the IDs of the new computes are the
+fix-ID + underscore + "temp" or fix_ID + underscore + "press".
 
 Note that these are NOT the computes used by thermodynamic output (see
 the "thermo_style"_thermo_style.html command) with ID = {thermo_temp}
-and {thermo_press}.  This means you can change the attributes of this
+and {thermo_press}.  This means you can change the attributes of these
 fix's temperature or pressure via the
-"compute_modify"_compute_modify.html command or print this temperature
-or pressure during thermodynamic output via the "thermo_style
-custom"_thermo_style.html command using the appropriate compute-ID.
-It also means that changing attributes of {thermo_temp} or
-{thermo_press} will have no effect on this fix.
+"compute_modify"_compute_modify.html command.  Or you can print this
+temperature or pressure during thermodynamic output via the
+"thermo_style custom"_thermo_style.html command using the appropriate
+compute-ID.  It also means that changing attributes of {thermo_temp}
+or {thermo_press} will have no effect on this fix.
 
 Like other fixes that perform thermostatting, fix nvt and fix npt can
 be used with "compute commands"_compute.html that calculate a
diff --git a/doc/src/fix_nh_uef.txt b/doc/src/fix_nh_uef.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bde1818371f2c4bcab0bafe11e19c4b04df107b8
--- /dev/null
+++ b/doc/src/fix_nh_uef.txt
@@ -0,0 +1,228 @@
+<"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
+
+:link(lws,http://lammps.sandia.gov)
+:link(ld,Manual.html)
+:link(lc,Section_commands.html#comm)
+
+:line
+
+fix nvt/uef command :h3
+fix npt/uef command :h3
+
+[Syntax:]
+
+fix ID group-ID style_name erate edot_x edot_y temp Tstart Tstop Tdamp keyword value ... :pre
+
+ID, group-ID are documented in "fix"_fix.html command :ulb,l
+style_name = {nvt/uef} or {npt/uef} :l
+{Tstart}, {Tstop}, and {Tdamp} are documented in the "fix npt"_fix_nh.html command :l
+{edot_x} and {edot_y} are the strain rates in the x and y directions (1/(time units)) :l
+one or more keyword/value pairs may be appended :l
+keyword = {ext} or {strain} or {iso} or {x} or {y} or {z} or {tchain} or {pchain} or {tloop} or {ploop} or {mtk}
+  {ext} value = {x} or {y} or {z} or {xy} or {yz} or {xz} = external dimensions
+    sets the external dimensions used to calculate the scalar pressure
+  {strain} values = e_x e_y = initial strain
+    usually not needed, but may be needed to resume a run with a data file.
+  {iso}, {x}, {y}, {z}, {tchain}, {pchain}, {tloop}, {ploop}, {mtk} keywords
+    documented by the "fix npt"_fix_nh.html command :pre
+:ule
+
+[Examples:]
+
+fix uniax_nvt all nvt/uef temp 400 400 100 erate 0.00001 -0.000005
+fix biax_nvt all nvt/uef temp 400 400 100 erate 0.000005 0.000005
+fix uniax_npt all npt/uef temp 400 400 300 iso 1 1 3000 erate 0.00001 -0.000005 ext yz
+fix biax_npt all npt/uef temp 400 400 100 erate -0.00001 0.000005 x 1 1 3000 :pre
+
+[Description:]
+
+This fix can be used to simulate non-equilibrium molecular dynamics
+(NEMD) under diagonal flow fields, including uniaxial and biaxial
+flow.  Simulations under continuous extensional flow may be carried
+out for an indefinite amount of time.  It is an implementation of the
+boundary conditions from "(Dobson)"_#Dobson, and also uses numerical
+lattice reduction as was proposed by "(Hunt)"_#Hunt. The lattice
+reduction algorithm is from "(Semaev)"_Semaev. The fix is intended for
+simulations of homogeneous flows, and integrates the SLLOD equations
+of motion, originally proposed by Hoover and Ladd (see "(Evans and
+Morriss)"_#Sllod).  Additional detail about this implementation can be
+found in "(Nicholson and Rutledge)"_#Nicholson.
+
+Note that NEMD simulations of a continuously strained system can be
+performed using the "fix deform"_fix_deform.html, "fix
+nvt/sllod"_fix_nvt_sllod.html, and "compute
+temp/deform"_compute_temp_deform.html commands.
+
+The applied flow field is set by the {eps} keyword. The values
+{edot_x} and {edot_y} correspond to the strain rates in the xx and yy
+directions.  It is implicitly assumed that the flow field is
+traceless, and therefore the strain rate in the zz direction is eqal
+to -({edot_x} + {edot_y}).
+
+NOTE: Due to an instability in the SLLOD equations under extension,
+"fix momentum"_fix_momentum.html should be used to regularly reset the
+linear momentum.
+
+The boundary conditions require a simulation box that does not have a
+consistent alignment relative to the applied flow field. Since LAMMPS
+utilizes an upper-triangular simulation box, it is not possible to
+express the evolving simulation box in the same coordinate system as
+the flow field.  This fix keeps track of two coordinate systems: the
+flow frame, and the upper triangular LAMMPS frame. The coordinate
+systems are related to each other through the QR decomposition, as is
+illustrated in the image below.
+
+:c,image(JPG/uef_frames.jpg)
+
+During most molecular dynamics operations, the system is represented
+in the LAMMPS frame. Only when the positions and velocities are
+updated is the system rotated to the flow frame, and it is rotated
+back to the LAMMPS frame immediately afterwards. For this reason, all
+vector-valued quantities (except for the tensors from
+"compute_pressure/uef"_compute_pressure_uef.html and
+"compute_temp/uef"_compute_temp_uef.html) will be computed in the
+LAMMPS frame. Rotationally invariant scalar quantities like the
+temperature and hydrostatic pressure are frame-invariant and will be
+computed correctly. Additionally, the system is in the LAMMPS frame
+during all of the output steps, and therefore trajectory files made
+using the dump command will be in the LAMMPS frame unless the
+"dump_cfg/uef"_dump_cfg_uef.html command is used.
+
+:line
+
+Temperature control is achieved with the default Nose-Hoover style
+thermostat documented in "fix npt"_fix_nh.html. When this fix is
+active, only the peculiar velocity of each atom is stored, defined as
+the velocity relative to the streaming velocity. This is in contrast
+to "fix nvt/sllod"_fix_nvt_sllod.html, which uses a lab-frame
+velocity, and removes the contribution from the streaming velocity in
+order to compute the temperature.
+
+Pressure control is achieved using the default Nose-Hoover barostat
+documented in "fix npt"_fix_nh.html. There are two ways to control the
+pressure using this fix. The first method involves using the {ext}
+keyword along with the {iso} pressure style. With this method, the
+pressure is controlled by scaling the simulation box isotropically to
+achieve the average pressure only in the directions specified by
+{ext}.  For example, if the {ext} value is set to {xy}, the average
+pressure (Pxx+Pyy)/2 will be controlled.
+
+This example command will control the total hydrostatic pressure under
+uniaxial tension:
+
+fix f1 all npt/uef temp 0.7 0.7 0.5 iso 1 1 5 erate -0.5 -0.5 ext xyz :pre
+
+This example command will control the average stress in compression
+directions, which would typically correspond to free surfaces under
+drawing with uniaxial tension:
+
+fix f2 all npt/uef temp 0.7 0.7 0.5 iso 1 1 5 erate -0.5 -0.5 ext xy :pre
+
+The second method for pressure control involves setting the normal
+stresses using the {x}, {y} , and/or {z} keywords. When using this
+method, the same pressure must be specified via {Pstart} and {Pstop}
+for all dimensions controlled. Any choice of pressure conditions that
+would cause LAMMPS to compute a deviatoric stress are not permissible
+and will result in an error. Additionally, all dimensions with
+controlled stress must have the same applied strain rate. The {ext}
+keyword must be set to the default value ({xyz}) when using this
+method.
+
+For example, the following commands will work:
+
+fix f3 all npt/uef temp 0.7 0.7 0.5 x 1 1 5 y 1 1 5 erate -0.5 -0.5
+fix f4 all npt/uef temp 0.7 0.7 0.5 z 1 1 5 erate 0.5 0.5 :pre
+
+The following commands will not work:
+
+fix f5 all npt/uef temp 0.7 0.7 0.5 x 1 1 5 z 1 1 5 erate -0.5 -0.5
+fix f6 all npt/uef temp 0.7 0.7 0.5 x 1 1 5 z 2 2 5 erate 0.5 0.5 :pre
+
+:line
+
+These fix computes a temperature and pressure each timestep.  To do
+this, it creates its own computes of style "temp/uef" and
+"pressure/uef", as if one of these two sets of commands had been
+issued:
+
+compute fix-ID_temp group-ID temp/uef
+compute fix-ID_press group-ID pressure/uef fix-ID_temp :pre
+
+compute fix-ID_temp all temp/uef
+compute fix-ID_press all pressure/uef fix-ID_temp :pre
+
+See the "compute temp/uef"_compute_temp_uef.html and "compute
+pressure/uef"_compute_pressure_uef.html commands for details.  Note
+that the IDs of the new computes are the fix-ID + underscore + "temp"
+or fix_ID + underscore + "press".
+
+[Restart, fix_modify, output, run start/stop, minimize info:]
+
+The fix writes the state of all the thermostat and barostat variables,
+as well as the cumulative strain applied, to "binary restart
+files"_restart.html.  See the "read_restart"_read_restart.html command
+for info on how to re-specify a fix in an input script that reads a
+restart file, so that the operation of the fix continues in an
+uninterrupted fashion.
+
+NOTE: It is not necessary to set the {strain} keyword when resuming a
+run from a restart file. Only for resuming from data files, which do
+not contain the cumulative applied strain, will this keyword be
+necessary.
+
+This fix can be used with the "fix_modify"_fix_modify.html {temp} and
+{press} options. The temperature and pressure computes used must be of
+type {temp/uef} and {pressure/uef}.
+
+This fix computes the same global scalar and vecor quantities as "fix
+npt"_fix_nh.html.
+
+The fix is not invoked during "energy minimization"_minimize.html.
+
+[Restrictions:]
+
+This fix is part of the USER-UEF package. It is only enabled if LAMMPS
+was built with that package. See the "Making
+LAMMPS"_Section_start.html#start_3 section for more info.
+
+Due to requirements of the boundary conditions, when the {strain}
+keyword is set to zero (or unset), the initial simulation box must be
+cubic and have style triclinic. If the box is initially of type ortho,
+use "change_box"_change_box.html before invoking the fix.
+
+NOTE: When resuming from restart files, you may need to use "box tilt
+large"_box.html since lammps has internal criteria from lattice
+reduction that are not the same as the criteria in the numerical
+lattice reduction algorithm.
+
+[Related commands:]
+
+"fix nvt"_fix_nh.html, "fix nvt/sllod"_fix_nvt_sllod.html, "compute
+temp/uef"_compute_temp_uef.html, "compute
+pressure/uef"_compute_pressure_uef.html, "dump
+cfg/uef"_dump_cfg_uef.html
+
+[Default:]
+
+The default keyword values specific to this fix are exy = xyz, strain
+= 0 0.  The remaining defaults are the same as for {fix
+npt}_fix_nh.html except tchain = 1.  The reason for this change is
+given in "fix nvt/sllod"_fix_nvt_sllod.html.
+
+:line
+
+:link(Dobson)
+[(Dobson)] Dobson, J Chem Phys, 141, 184103 (2014).
+
+:link(Hunt)
+[(Hunt)] Hunt, Mol Simul, 42, 347 (2016).
+
+:link(Semaev)
+[(Semaev)] Semaev, Cryptography and Lattices, 181 (2001).
+
+:link(Sllod)
+[(Evans and Morriss)] Evans and Morriss, Phys Rev A, 30, 1528 (1984).
+
+:link(Nicholson)
+[(Nicholson and Rutledge)] Nicholson and Rutledge, J Chem Phys, 145,
+244903 (2016).
diff --git a/doc/src/fix_rhok.txt b/doc/src/fix_rhok.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2db920ac4bef02c60f95604beb5e6feeb9cb7de6
--- /dev/null
+++ b/doc/src/fix_rhok.txt
@@ -0,0 +1,56 @@
+"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
+
+:link(lws,http://lammps.sandia.gov)
+:link(ld,Manual.html)
+:link(lc,Section_commands.html#comm)
+
+:line
+
+fix rhok command :h3
+
+fix ID group-ID rhok nx ny nz K a :pre
+
+ID, group-ID are documented in "fix"_fix.html command
+nx, ny, nz = k-vektor of collective density field
+K = spring constant of bias potential
+a = anchor point of bias potential :ul
+
+[Examples:]
+
+fix bias all rhok 16 0 0 4.0 16.0
+fix 1 all npt temp 0.8 0.8 4.0 z 2.2 2.2 8.0
+# output of 4 values from fix rhok: U_bias rho_k_RE  rho_k_IM  |rho_k|
+thermo_style custom step temp pzz lz f_bias f_bias\[1\] f_bias\[2\] f_bias\[3\] :pre
+
+[Description:]
+
+The fix applies a force to atoms given by the potential
+
+:c,image(Eqs/fix_rhok.jpg)
+
+as described in "(Pedersen)"_#Pedersen.
+
+This field, which biases configurations with long-range order, can be
+used to study crystal-liquid interfaces and determine melting
+temperatures "(Pedersen)"_#Pedersen.
+
+An example of using the interface pinning method is located in the
+{examples/USER/misc/rhok} directory.
+
+[Restrictions:]
+
+This fix is part of the MISC package.  It is only enabled if LAMMPS
+was built with that package.  See the "Making
+LAMMPS"_Section_start.html#start_3 section for more info.
+
+[Related commands:]
+
+"thermo_style"_thermo_style.html
+
+[Default:] none
+
+:line
+
+:link(Pedersen)
+[(Pedersen)] Pedersen, J. Chem. Phys., 139, 104102 (2013).
+
diff --git a/doc/src/fix_rigid.txt b/doc/src/fix_rigid.txt
index 99105e13507249636d2c0b5118450cf9d11da040..a5a631bd382e851c60cda8bfbe17c51eb0b73d18 100644
--- a/doc/src/fix_rigid.txt
+++ b/doc/src/fix_rigid.txt
@@ -26,6 +26,9 @@ style = {rigid} or {rigid/nve} or {rigid/nvt} or {rigid/npt} or {rigid/nph} or {
 bodystyle = {single} or {molecule} or {group} :l
   {single} args = none
   {molecule} args = none
+  {custom} args = {i_propname} or {v_varname}
+    i_propname = an integer property defined via fix property/atom
+    v_varname  = an atom-style or atomfile-style variable 
   {group} args = N groupID1 groupID2 ...
     N = # of groups
     groupID1, groupID2, ... = list of N group IDs :pre
@@ -80,6 +83,16 @@ fix 1 rods rigid/npt molecule temp 300.0 300.0 100.0 iso 0.5 0.5 10.0
 fix 1 particles rigid/npt molecule temp 1.0 1.0 5.0 x 0.5 0.5 1.0 z 0.5 0.5 1.0 couple xz
 fix 1 water rigid/nph molecule iso 0.5 0.5 1.0
 fix 1 particles rigid/npt/small molecule temp 1.0 1.0 1.0 iso 0.5 0.5 1.0 :pre
+
+variable bodyid atom 1.0*gmask(clump1)+2.0*gmask(clump2)+3.0*gmask(clump3)
+fix 1 clump rigid custom v_bodyid :pre
+
+variable bodyid atomfile bodies.txt
+fix 1 clump rigid custom v_bodyid :pre
+
+fix 0 all property/atom i_bodyid
+read_restart data.rigid fix 0 NULL Bodies
+fix 1 clump rigid/small custom i_bodyid :pre
 	
 [Description:]
 
@@ -100,7 +113,7 @@ of a biomolecule such as a protein.
 
 Example of small rigid bodies are patchy nanoparticles, such as those
 modeled in "this paper"_#Zhang1 by Sharon Glotzer's group, clumps of
-granular particles, lipid molecules consiting of one or more point
+granular particles, lipid molecules consisting of one or more point
 dipoles connected to other spheroids or ellipsoids, irregular
 particles built from line segments (2d) or triangles (3d), and
 coarse-grain models of nano or colloidal particles consisting of a
@@ -203,11 +216,11 @@ most one rigid body.  Which atoms are in which bodies can be defined
 via several options.
 
 NOTE: With the {rigid/small} styles, which require that {bodystyle} be
-specified as {molecule}, you can define a system that has no rigid
-bodies initially.  This is useful when you are using the {mol} keyword
-in conjunction with another fix that is adding rigid bodies on-the-fly
-as molecules, such as "fix deposit"_fix_deposit.html or "fix
-pour"_fix_pour.html.
+specified as {molecule} or {custom}, you can define a system that has
+no rigid bodies initially.  This is useful when you are using the {mol}
+keyword in conjunction with another fix that is adding rigid bodies
+on-the-fly as molecules, such as "fix deposit"_fix_deposit.html or
+"fix pour"_fix_pour.html.
 
 For bodystyle {single} the entire fix group of atoms is treated as one
 rigid body.  This option is only allowed for the {rigid} styles.
@@ -222,6 +235,15 @@ molecule ID = 0) surrounding rigid bodies, this may not be what you
 want.  Thus you should be careful to use a fix group that only
 includes atoms you want to be part of rigid bodies.
 
+Bodystyle {custom} is similar to bodystyle {molecule}, however some
+custom properties are used to group atoms into rigid bodies. The
+special case for molecule/body ID = 0 is not available. Possible
+custom properties are an integer property associated with atoms through
+"fix property/atom"_fix_property_atom.html or an atom style variable
+or an atomfile style variable. For the latter two, the variable value
+will be rounded to an integer and then rigid bodies defined from
+those values.
+
 For bodystyle {group}, each of the listed groups is treated as a
 separate rigid body.  Only atoms that are also in the fix group are
 included in each rigid body.  This option is only allowed for the
diff --git a/doc/src/fixes.txt b/doc/src/fixes.txt
index 7000a66c51dc3df60b1c9701fa77e57c5768fd22..ad3e95fa4170422c0472e541c98adc6dd331b285 100644
--- a/doc/src/fixes.txt
+++ b/doc/src/fixes.txt
@@ -59,6 +59,7 @@ Fixes :h1
    fix_langevin
    fix_langevin_drude
    fix_langevin_eff
+   fix_latte
    fix_lb_fluid
    fix_lb_momentum
    fix_lb_pc
@@ -76,6 +77,7 @@ Fixes :h1
    fix_neb
    fix_nh
    fix_nh_eff
+   fix_nh_uef
    fix_nph_asphere
    fix_nph_body
    fix_nph_sphere
@@ -124,6 +126,7 @@ Fixes :h1
    fix_reaxc_species
    fix_recenter
    fix_restrain
+   fix_rhok
    fix_rigid
    fix_rx
    fix_saed_vtk
diff --git a/doc/src/lammps.book b/doc/src/lammps.book
index 86dfe78af35b27f8c58ba50da4fd5b280fe1a4c2..0691f43e9b1c120df66b9e8ba372e33f9019eb80 100644
--- a/doc/src/lammps.book
+++ b/doc/src/lammps.book
@@ -62,6 +62,7 @@ dump_modify.html
 dump_molfile.html
 dump_netcdf.html
 dump_vtk.html
+dump_cfg_uef.html
 echo.html
 fix.html
 fix_modify.html
@@ -187,6 +188,7 @@ fix_ipi.html
 fix_langevin.html
 fix_langevin_drude.html
 fix_langevin_eff.html
+fix_latte.html
 fix_lb_fluid.html
 fix_lb_momentum.html
 fix_lb_pc.html
@@ -231,6 +233,7 @@ fix_nvt_manifold_rattle.html
 fix_nvt_sllod.html
 fix_nvt_sllod_eff.html
 fix_nvt_sphere.html
+fix_nh_uef.html
 fix_oneway.html
 fix_orient.html
 fix_phonon.html
@@ -253,6 +256,7 @@ fix_reaxc_species.html
 fix_recenter.html
 fix_restrain.html
 fix_rigid.html
+fix_rhok.html
 fix_rx.html
 fix_saed_vtk.html
 fix_setforce.html
@@ -354,6 +358,7 @@ compute_pe.html
 compute_pe_atom.html
 compute_plasticity_atom.html
 compute_pressure.html
+compute_pressure_uef.html
 compute_property_atom.html
 compute_property_chunk.html
 compute_property_local.html
@@ -403,6 +408,7 @@ compute_temp_region.html
 compute_temp_region_eff.html
 compute_temp_rotate.html
 compute_temp_sphere.html
+compute_temp_uef.html
 compute_ti.html
 compute_torque_chunk.html
 compute_vacf.html
@@ -514,7 +520,7 @@ pair_zero.html
 bond_class2.html
 bond_fene.html
 bond_fene_expand.html
-bond_oxdna.html
+bond_gromos.html
 bond_harmonic.html
 bond_harmonic_shift.html
 bond_harmonic_shift_cut.html
@@ -522,6 +528,7 @@ bond_hybrid.html
 bond_morse.html
 bond_none.html
 bond_nonlinear.html
+bond_oxdna.html
 bond_quartic.html
 bond_table.html
 bond_zero.html
diff --git a/doc/src/package.txt b/doc/src/package.txt
index 58f6a5e34db2ccf9ee89c3b779742b6f8e7559aa..5c698934e857bfa31bdceac69bb3296ff76961c3 100644
--- a/doc/src/package.txt
+++ b/doc/src/package.txt
@@ -62,7 +62,7 @@ args = arguments specific to the style :l
       {no_affinity} values = none
   {kokkos} args = keyword value ...
     zero or more keyword/value pairs may be appended
-    keywords = {neigh} or {neigh/qeq} or {newton} or {binsize} or {comm} or {comm/exchange} or {comm/forward}
+    keywords = {neigh} or {neigh/qeq} or {newton} or {binsize} or {comm} or {comm/exchange} or {comm/forward} or {comm/reverse}
       {neigh} value = {full} or {half}
         full = full neighbor list
         half = half neighbor list built in thread-safe manner
@@ -75,9 +75,10 @@ args = arguments specific to the style :l
       {binsize} value = size
         size = bin size for neighbor list construction (distance units)
       {comm} value = {no} or {host} or {device}
-        use value for both comm/exchange and comm/forward
+        use value for comm/exchange and comm/forward and comm/reverse
       {comm/exchange} value = {no} or {host} or {device}
       {comm/forward} value = {no} or {host} or {device}
+      {comm/reverse} value = {no} or {host} or {device}
         no = perform communication pack/unpack in non-KOKKOS mode
         host = perform pack/unpack on host (e.g. with OpenMP threading)
         device = perform pack/unpack on device (e.g. on GPU)
@@ -429,17 +430,18 @@ Coulombic solver"_kspace_style.html because the GPU is faster at
 performing pairwise interactions, then this rule of thumb may give too
 large a binsize.
 
-The {comm} and {comm/exchange} and {comm/forward} keywords determine
+The {comm} and {comm/exchange} and {comm/forward} and {comm/reverse} keywords determine
 whether the host or device performs the packing and unpacking of data
 when communicating per-atom data between processors.  "Exchange"
 communication happens only on timesteps that neighbor lists are
 rebuilt.  The data is only for atoms that migrate to new processors.
-"Forward" communication happens every timestep.  The data is for atom
+"Forward" communication happens every timestep. "Reverse" communication
+happens every timestep if the {newton} option is on.  The data is for atom
 coordinates and any other atom properties that needs to be updated for
 ghost atoms owned by each processor.
 
 The {comm} keyword is simply a short-cut to set the same value
-for both the {comm/exchange} and {comm/forward} keywords.
+for both the {comm/exchange} and {comm/forward} and {comm/reverse} keywords.
 
 The value options for all 3 keywords are {no} or {host} or {device}.
 A value of {no} means to use the standard non-KOKKOS method of
diff --git a/doc/src/pair_born.txt b/doc/src/pair_born.txt
index a3cc744a2203cd92ae8314dca242a0e76f70d8cb..a016f77fa3ba6c96ef18fbb7d1094e79a0475aab 100644
--- a/doc/src/pair_born.txt
+++ b/doc/src/pair_born.txt
@@ -17,6 +17,7 @@ pair_style born/coul/long/omp command :h3
 pair_style born/coul/msm command :h3
 pair_style born/coul/msm/omp command :h3
 pair_style born/coul/wolf command :h3
+pair_style born/coul/wolf/cs command :h3
 pair_style born/coul/wolf/gpu command :h3
 pair_style born/coul/wolf/omp command :h3
 pair_style born/coul/dsf command :h3
@@ -36,7 +37,7 @@ args = list of arguments for a particular style :ul
   {born/coul/msm} args = cutoff (cutoff2)
     cutoff = global cutoff for non-Coulombic (and Coulombic if only 1 arg) (distance units)
     cutoff2 = global cutoff for Coulombic (optional) (distance units)
-  {born/coul/wolf} args = alpha cutoff (cutoff2)
+  {born/coul/wolf} or {born/coul/wolf/cs} args = alpha cutoff (cutoff2)
     alpha = damping parameter (inverse distance units)
     cutoff = global cutoff for non-Coulombic (and Coulombic if only 1 arg) (distance units)
     cutoff2 = global cutoff for Coulombic (optional) (distance units)
@@ -65,6 +66,7 @@ pair_coeff 1 1 6.08 0.317 2.340 24.18 11.51 :pre
 
 pair_style born/coul/wolf 0.25 10.0
 pair_style born/coul/wolf 0.25 10.0 9.0
+pair_style born/coul/wolf/cs 0.25 10.0 9.0
 pair_coeff * * 6.08 0.317 2.340 24.18 11.51
 pair_coeff 1 1 6.08 0.317 2.340 24.18 11.51 :pre
 
@@ -106,8 +108,9 @@ damped shifted force model as in the "coul/dsf"_pair_coul.html style.
 Style {born/coul/long/cs} is identical to {born/coul/long} except that
 a term is added for the "core/shell model"_Section_howto.html#howto_25
 to allow charges on core and shell particles to be separated by r =
-0.0. The same correction is introduced for {born/coul/dsf/cs} style
-which is identical to {born/coul/dsf}.
+0.0.  The same correction is introduced for the {born/coul/dsf/cs}
+style which is identical to {born/coul/dsf}.  And likewise for
+{born/coul/wolf/cs} style which is identical to {born/coul/wolf}.
 
 Note that these potentials are related to the "Buckingham
 potential"_pair_buck.html.
diff --git a/doc/src/pair_coul.txt b/doc/src/pair_coul.txt
index 29e5beed3c755f9e85b2b5d49f3fcbcdd4095470..4cca5ee0d78a529162e782121855150daa162e06 100644
--- a/doc/src/pair_coul.txt
+++ b/doc/src/pair_coul.txt
@@ -29,6 +29,7 @@ pair_style coul/streitz command :h3
 pair_style coul/wolf command :h3
 pair_style coul/wolf/kk command :h3
 pair_style coul/wolf/omp command :h3
+pair_style coul/wolf/cs command :h3
 pair_style tip4p/cut command :h3
 pair_style tip4p/long command :h3
 pair_style tip4p/cut/omp command :h3
@@ -43,6 +44,7 @@ pair_style coul/long cutoff
 pair_style coul/long/cs cutoff
 pair_style coul/long/gpu cutoff
 pair_style coul/wolf alpha cutoff
+pair_style coul/wolf/cs alpha cutoff
 pair_style coul/streitz cutoff keyword alpha
 pair_style tip4p/cut otype htype btype atype qdist cutoff
 pair_style tip4p/long otype htype btype atype qdist cutoff :pre
@@ -72,6 +74,7 @@ pair_style coul/msm 10.0
 pair_coeff * * :pre
 
 pair_style coul/wolf 0.2 9.0
+pair_style coul/wolf/cs 0.2 9.0
 pair_coeff * * :pre
 
 pair_style coul/streitz 12.0 ewald
@@ -202,7 +205,9 @@ interactions outside that distance are computed in reciprocal space.
 
 Style {coul/long/cs} is identical to {coul/long} except that a term is
 added for the "core/shell model"_Section_howto.html#howto_25 to allow
-charges on core and shell particles to be separated by r = 0.0.
+charges on core and shell particles to be separated by r = 0.0.  The
+same correction is introduced for the {coul/wolf/cs} style which is
+identical to {coul/wolf}.
 
 Styles {tip4p/cut} and {tip4p/long} implement the coulomb part of
 the TIP4P water model of "(Jorgensen)"_#Jorgensen3, which introduces
diff --git a/doc/src/pair_cs.txt b/doc/src/pair_cs.txt
index 64dfc20d4748e774b51051dba65cd25953f04550..c1084c608797863a3ecc7528cdb457298d8c3512 100644
--- a/doc/src/pair_cs.txt
+++ b/doc/src/pair_cs.txt
@@ -9,12 +9,13 @@
 pair_style born/coul/long/cs command :h3
 pair_style buck/coul/long/cs command :h3
 pair_style born/coul/dsf/cs command :h3
+pair_style born/coul/wolf/cs command :h3
 
 [Syntax:]
 
 pair_style style args :pre
 
-style = {born/coul/long/cs} or {buck/coul/long/cs} or {born/coul/dsf/cs}
+style = {born/coul/long/cs} or {buck/coul/long/cs} or {born/coul/dsf/cs} or {born/coul/wolf/cs}
 args = list of arguments for a particular style :ul
   {born/coul/long/cs} args = cutoff (cutoff2)
     cutoff = global cutoff for non-Coulombic (and Coulombic if only 1 arg) (distance units)
@@ -26,6 +27,10 @@ args = list of arguments for a particular style :ul
     alpha = damping parameter (inverse distance units)
     cutoff = global cutoff for non-Coulombic (and Coulombic if only 1 arg) (distance units)
     cutoff2 = global cutoff for Coulombic (distance units) :pre
+  {born/coul/wolf/cs} args = alpha cutoff (cutoff2)
+    alpha = damping parameter (inverse distance units)
+    cutoff = global cutoff for Buckingham (and Coulombic if only 1 arg) (distance units)
+    cutoff2 = global cutoff for Coulombic (optional) (distance units)
 
 [Examples:]
 
@@ -41,6 +46,10 @@ pair_style born/coul/dsf/cs 0.1 10.0 12.0
 pair_coeff * *   0.0 1.00 0.00 0.00 0.00
 pair_coeff 1 1 480.0 0.25 0.00 1.05 0.50 :pre
 
+pair_style born/coul/wolf/cs 0.25 10.0 12.0
+pair_coeff * *   0.0 1.00 0.00 0.00 0.00
+pair_coeff 1 1 480.0 0.25 0.00 1.05 0.50 :pre
+
 [Description:]
 
 These pair styles are designed to be used with the adiabatic
@@ -73,13 +82,21 @@ the core and shell, epsilon is the dielectric constant and r_min is the
 minimal distance.
 
 The pair style {born/coul/dsf/cs} is identical to the
-"pair_style born/coul/dsf"_pair_born.html style, which uses the
+"pair_style born/coul/dsf"_pair_born.html style, which uses
 the damped shifted force model as in "coul/dsf"_pair_coul.html
 to compute the Coulomb contribution. This approach does not require
 a long-range solver, thus the only correction is the addition of a
 minimal distance to avoid the possible r = 0.0 case for a
 core/shell pair.
 
+The pair style {born/coul/wolf/cs} is identical to the
+"pair_style born/coul/wolf"_pair_born.html style, which uses
+the Wolf summation as in "coul/wolf"_pair_coul.html to compute
+the Coulomb contribution. This approach does not require
+a long-range solver, thus the only correction is the addition of a
+minimal distance to avoid the possible r = 0.0 case for a
+core/shell pair.
+
 [Restrictions:]
 
 These pair styles are part of the CORESHELL package.  They are only
diff --git a/doc/src/pair_dpd.txt b/doc/src/pair_dpd.txt
index 8d194bb0920999dadfaf7673619369c0550e68d5..9e29e93430602fd485bef467602d7001c8534fab 100644
--- a/doc/src/pair_dpd.txt
+++ b/doc/src/pair_dpd.txt
@@ -8,6 +8,7 @@
 
 pair_style dpd command :h3
 pair_style dpd/gpu command :h3
+pair_style dpd/intel command :h3
 pair_style dpd/omp command :h3
 pair_style dpd/tstat command :h3
 pair_style dpd/tstat/gpu command :h3
diff --git a/doc/src/pair_eam.txt b/doc/src/pair_eam.txt
index a0026432ec6cd52255ca53f8a084c8f10b60454f..03e77f53ab0f291be3054b1464d1936b057e95e6 100644
--- a/doc/src/pair_eam.txt
+++ b/doc/src/pair_eam.txt
@@ -294,7 +294,7 @@ distribution have a ".cdeam" suffix.
 
 Style {eam/fs} computes pairwise interactions for metals and metal
 alloys using a generalized form of EAM potentials due to Finnis and
-Sinclair "(Finnis)"_#Finnis.  The total energy Ei of an atom I is
+Sinclair "(Finnis)"_#Finnis1.  The total energy Ei of an atom I is
 given by
 
 :c,image(Eqs/pair_eam_fs.jpg)
@@ -442,7 +442,7 @@ of Physics: Condensed Matter, 16, S2629 (2004).
 [(Daw)] Daw, Baskes, Phys Rev Lett, 50, 1285 (1983).
 Daw, Baskes, Phys Rev B, 29, 6443 (1984).
 
-:link(Finnis)
+:link(Finnis1)
 [(Finnis)] Finnis, Sinclair, Philosophical Magazine A, 50, 45 (1984).
 
 :link(Stukowski)
diff --git a/examples/USER/misc/rhok/README.md b/examples/USER/misc/rhok/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4e011255fcc8e7628379451a94ee30db8ec867a3
--- /dev/null
+++ b/examples/USER/misc/rhok/README.md
@@ -0,0 +1,74 @@
+# The Interface Pinning method for studying solid-liquid transitions
+
+In this example we will use the interface pinnig method to study a solid-liquid transition.
+This is done by adding a harmonic potential to the Hamiltonian 
+that bias the system towards two-phase configurations:
+
+  U_bias = 0.5*K*(Q-a)^2
+
+The bias field couple to an order-parameter of crystallinity Q. The implementation use long-range order:
+
+  Q=|rho_k|, 
+
+where rho_k is the collective density field of the wave-vector k. 
+For future reference we note that the structure factor S(k) is given by the variance of the collective density field: 
+
+  S(k)=|rho_k|^2.
+
+### About the method
+
+It is recommended to get familiar with the interface pinning method by reading:
+
+  [Ulf R. Pedersen, JCP 139, 104102 (2013)](http://dx.doi.org/10.1063/1.4818747)
+
+A detailed bibliography is provided at
+
+  <http://urp.dk/interface_pinning.htm>
+
+and a brief introduction can be found at YouTube:
+
+  [![Interface Pinning](http://img.youtube.com/vi/F_79JZNdyoQ/0.jpg)](http://www.youtube.com/watch?v=F_79JZNdyoQ)
+
+### Implimentation in LAMMPS
+
+For this example we will be using the rhok fix.
+
+   fix [name] [groupID] rhok [nx] [ny] [nz] [K] [a]
+
+This fix include a harmonic bias potential U_bias=0.5*K*(|rho_k|-a)^2 to the force calculation.
+The elements of the wave-vector k is given by the nx, ny and nz input: 
+
+  k_x = (2 pi / L_x) * n_x, k_y = (2 pi / L_y) * n_y and k_z = (2 pi / L_z) * n_z. 
+
+We will use a k vector that correspond to a Bragg peak.
+
+## Example: the Lennard-Jones (LJ) model
+
+We will use the interface pinning method to study melting of the LJ model
+at temperature 0.8 and pressure 2.185. This is a coexistence state-point, and the method
+can be used to show this. The present directory contains the input files that we will use:
+
+  in.crystal
+  in.setup
+  in.pinning
+
+1. First we will determine the density of the crystal with the LAMMPS input file in.crystal.
+  From the output we get that the average density after equilibration is 0.9731. 
+  We need this density to ensure hydrostatic pressure when in a two-phase simulation.
+
+2. Next, we setup a two-phase configuration using in.setup.
+
+3. Finally, we run a two-phase simulation with the bias-field applied using in.pinning.
+  The last column in the output show |rho_k|. We note that after a equilibration period
+  the value fluctuates around the anchor point (a) -- showing that this is indeed a coexistence
+  state point.
+
+The reference [JCP 139, 104102 (2013)](http://dx.doi.org/10.1063/1.4818747) gives details on using the method to find coexistence state points,
+and the reference [JCP 142, 044104 (2015)](http://dx.doi.org/10.1063/1.4818747) show how the crystal growth rate can be computed from fluctuations.
+That method have been experienced to be most effective in the slightly super-heated regime above the melting temperature.
+
+## Contact
+
+  Ulf R. Pedersen;
+  <http://www.urp.dk>;
+  ulf AT urp.dk
diff --git a/examples/USER/misc/rhok/in.crystal b/examples/USER/misc/rhok/in.crystal
new file mode 100644
index 0000000000000000000000000000000000000000..55e9e59a064ad93165d3a5f35ca5661b5e1be3c2
--- /dev/null
+++ b/examples/USER/misc/rhok/in.crystal
@@ -0,0 +1,36 @@
+units		lj
+dimension	3
+boundary	p p p
+atom_style  atomic
+
+# truncated and shifted LJ potential
+pair_style	lj/cut 2.5
+pair_modify	shift yes
+lattice	fcc 0.9731
+region	my_box block 0 8.0   0 8.0   0 20.0
+create_box 1 my_box
+region particles block 0 8.0 0 8.0 0 20.0
+create_atoms 1 region particles
+pair_coeff 1 1 1.0 1.0 2.5
+pair_modify tail no
+pair_modify shift yes
+mass 1 1.0
+velocity all create 1.6 1 mom yes rot yes
+
+# simulation parameters
+neighbor	0.6 bin
+timestep	0.004
+run_style 	verlet
+fix ensemble all npt temp 0.8 0.8 4.0 aniso 2.185 2.185 8.0 pchain 32
+
+# computing long-range order (no bias is added since k=0)
+fix bias all rhok 16 0 0 0.0 0.0
+
+# output
+thermo 50
+thermo_style custom step temp press density f_bias[3]
+# dump dumpXYZ all xyz 2000 traj.xyz
+
+# NOTE: this is cut short to 5000 steps for demonstration purposes
+# run 100000
+run 5000
diff --git a/examples/USER/misc/rhok/in.pinning b/examples/USER/misc/rhok/in.pinning
new file mode 100644
index 0000000000000000000000000000000000000000..0c220f480b7ca8586c29b96b0455dd5002efda4d
--- /dev/null
+++ b/examples/USER/misc/rhok/in.pinning
@@ -0,0 +1,33 @@
+units       lj
+dimension   3
+boundary    p p p
+atom_style  atomic
+
+# truncated and shifted LJ potential
+pair_style  lj/cut 2.5
+pair_modify shift yes
+read_data   data.halfhalf
+pair_coeff  1 1 1.0 1.0 2.5
+mass        1 1.0
+
+# simulation parameters 
+neighbor	0.6 bin
+timestep	0.004
+run_style 	verlet
+
+velocity all create 0.8 1 mom yes rot yes
+fix ensemble all npt temp 0.8 0.8 4.0 z 2.185 2.185 8.0
+fix 100 all momentum 100 linear 1 1 1
+
+# harmonic rho_k bias-field 
+#                 nx ny nz K     a
+fix bias all rhok 16 0  0  4.0   26.00
+
+# output                                U_bias rho_k_RE  rho_k_IM |rho_k| 
+thermo_style custom step temp pzz pe lz f_bias f_bias[1] f_bias[2] f_bias[3]
+thermo 50
+# dump dumpXYZ all xyz 500 traj.xyz
+
+# NOTE: run reduced for demonstration purposes
+# run 50000
+run 5000
diff --git a/examples/USER/misc/rhok/in.setup b/examples/USER/misc/rhok/in.setup
new file mode 100644
index 0000000000000000000000000000000000000000..649b0f534c8fc8821ec301f2addac3568e3a120e
--- /dev/null
+++ b/examples/USER/misc/rhok/in.setup
@@ -0,0 +1,41 @@
+units lj
+dimension 3
+boundary p p p
+atom_style atomic
+
+# truncated and shifted LJ potential
+pair_style lj/cut 2.5
+pair_modify shift yes
+
+# fcc lattice
+lattice	fcc 0.9731
+region my_box block 0 8.0   0 8.0   0 20.0
+create_box 1 my_box
+region particles block 0 8.0 0 8.0 0 20.0
+create_atoms 1 region particles
+pair_coeff 1 1 1.0 1.0 2.5
+mass 1 1.0
+change_box all z final 0.0 34 remap units box
+
+# select particles in one side of the elongated box
+region left plane 0 0 10 0 0 1
+group left region left
+
+velocity left create 6.0 1 mom yes rot yes
+
+# simulation parameters
+neighbor	0.6 bin
+timestep	0.004
+run_style 	verlet
+fix ensemble left nve     # Note: only move particle in left-hand side
+fix langevin left langevin 3.0 0.8 100.0 2017
+
+# outout
+thermo_style custom step temp pzz pe lz
+thermo 100
+# dump dumpXYZ all xyz 100 traj.xyz
+
+# run reduced for demonstration purposes
+# run 10000
+run 5000
+write_data data.halfhalf
diff --git a/examples/USER/misc/rhok/log.22Sep2017.crystal.g++.1 b/examples/USER/misc/rhok/log.22Sep2017.crystal.g++.1
new file mode 100644
index 0000000000000000000000000000000000000000..05fadb5c032d51bca8064d40cb63a5948fa652ee
--- /dev/null
+++ b/examples/USER/misc/rhok/log.22Sep2017.crystal.g++.1
@@ -0,0 +1,187 @@
+LAMMPS (22 Sep 2017)
+  using 1 OpenMP thread(s) per MPI task
+units		lj
+dimension	3
+boundary	p p p
+atom_style  atomic
+
+# truncated and shifted LJ potential
+pair_style	lj/cut 2.5
+pair_modify	shift yes
+lattice	fcc 0.9731
+Lattice spacing in x,y,z = 1.6019 1.6019 1.6019
+region	my_box block 0 8.0   0 8.0   0 20.0
+create_box 1 my_box
+Created orthogonal box = (0 0 0) to (12.8152 12.8152 32.0379)
+  1 by 1 by 1 MPI processor grid
+region particles block 0 8.0 0 8.0 0 20.0
+create_atoms 1 region particles
+Created 5120 atoms
+pair_coeff 1 1 1.0 1.0 2.5
+pair_modify tail no
+pair_modify shift yes
+mass 1 1.0
+velocity all create 1.6 1 mom yes rot yes
+
+# simulation parameters
+neighbor	0.6 bin
+timestep	0.004
+run_style 	verlet
+fix ensemble all npt temp 0.8 0.8 4.0 aniso 2.185 2.185 8.0 pchain 32
+
+# computing long-range order (no bias is added since k=0)
+fix bias all rhok 16 0 0 0.0 0.0
+
+# output
+thermo 50
+thermo_style custom step temp press density f_bias[3]
+# dump dumpXYZ all xyz 2000 traj.xyz
+
+# NOTE: this is cut short to 5000 steps for demonstration purposes
+# run 100000
+run 5000
+Neighbor list info ...
+  update every 1 steps, delay 10 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 3.1
+  ghost atom cutoff = 3.1
+  binsize = 1.55, bins = 9 9 21
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair lj/cut, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 4.523 | 4.523 | 4.523 Mbytes
+Step Temp Press Density f_bias[3] 
+       0          1.6   -2.7568106       0.9731    71.554175 
+      50   0.78457786    3.1029192   0.97362639    54.327705 
+     100   0.85528971    2.4670259   0.97213457    55.189308 
+     150   0.85241818    2.3210306    0.9698027    56.138125 
+     200   0.82301385    2.3448692   0.96708227    55.735326 
+     250   0.83076383    2.0890816   0.96425763    55.320625 
+     300   0.81602823    2.0118796   0.96173925    54.095736 
+     350   0.81084006    1.9122192   0.95979392    54.526429 
+     400   0.80776593    1.8502174   0.95869117    54.434901 
+     450   0.80694697    1.8435873   0.95851085     53.20809 
+     500   0.81384248    1.8111331   0.95917305    53.419395 
+     550   0.81027072    1.9222272   0.96056019     54.36723 
+     600   0.81199582    2.0291945   0.96248486    54.888582 
+     650   0.82507964    2.0706462   0.96467227    55.807137 
+     700     0.832562    2.1471442    0.9668913    56.721267 
+     750   0.83358138    2.2674672     0.968984    56.723838 
+     800   0.83477542    2.3658275   0.97072603    56.234689 
+     850   0.84722921    2.3506233   0.97189674    56.262424 
+     900   0.83526965    2.4532068   0.97248856    56.219103 
+     950   0.83174583    2.4763958   0.97249527    56.409813 
+    1000   0.83022557    2.4334341   0.97194093    55.890858 
+    1050   0.83208978    2.3478416   0.97092452    54.934691 
+    1100   0.82789545     2.272404    0.9696152     54.90894 
+    1150   0.82678617    2.1798046   0.96819776    54.927782 
+    1200    0.8088841    2.1960256   0.96687735    54.914327 
+    1250   0.81512784    2.0736261   0.96579008    53.927291 
+    1300   0.81271067    2.0297138   0.96504188    54.289698 
+    1350    0.8201767    1.9493976   0.96464115    55.342131 
+    1400   0.80880489    2.0016987   0.96468463    55.757758 
+    1450    0.8114196    2.0282699   0.96514115    55.865676 
+    1500   0.81085664    2.0838361   0.96591869    56.553425 
+    1550   0.81257075    2.1283157   0.96694549    56.921544 
+    1600   0.82617645    2.1017986   0.96817075    56.858808 
+    1650   0.82616141    2.1885582   0.96941073    56.717917 
+    1700   0.81634174    2.2996967   0.97047447    56.453745 
+    1750   0.82447573    2.2924266   0.97128663    56.916813 
+    1800   0.83610432     2.236456   0.97178453    56.400752 
+    1850   0.82479203    2.3103493   0.97197318    55.891368 
+    1900   0.82298992    2.3059289   0.97181084    55.680563 
+    1950   0.82098556    2.2801003   0.97138609    55.754406 
+    2000    0.8181203    2.2480175   0.97078591    55.801363 
+    2050   0.82822293    2.1208884   0.97004107       55.687 
+    2100    0.7976818    2.2711199   0.96930169    55.459844 
+    2150   0.81817848    2.0680351   0.96860201    56.514731 
+    2200   0.80707457    2.1112141   0.96810519    55.504308 
+    2250   0.81651111    2.0077603   0.96781161    55.635702 
+    2300   0.80634534    2.0662241   0.96777177    56.051086 
+    2350   0.80892831    2.0619333   0.96799037    56.548711 
+    2400   0.82454203    1.9585394    0.9684672    56.695235 
+    2450   0.81517178     2.075283   0.96921622    56.613082 
+    2500   0.80969595    2.1624581   0.97010528     56.57516 
+    2550   0.80862964    2.2088622   0.97100774    57.072594 
+    2600   0.81468816    2.2293973   0.97192868    56.879212 
+    2650   0.82063107    2.2244887   0.97269715    55.454502 
+    2700   0.81691618    2.2789954   0.97319841    54.421943 
+    2750    0.8141787    2.2981247   0.97340453    54.469921 
+    2800   0.81973871    2.2422136    0.9733278    55.959235 
+    2850   0.82037399     2.201016   0.97302727    56.685826 
+    2900   0.80650164    2.2672955    0.9726128    56.574395 
+    2950   0.81752783    2.1317541   0.97207545    56.809412 
+    3000   0.80836945    2.1461483   0.97151192    57.205206 
+    3050   0.80785109    2.1189056   0.97103049    57.418763 
+    3100   0.79835058     2.146416   0.97069705    57.329383 
+    3150   0.79792089    2.1388267   0.97051679    57.279852 
+    3200   0.79934603    2.1049562   0.97046851    56.351494 
+    3250   0.79523232    2.1549779   0.97063956     56.00356 
+    3300    0.8004458    2.1145975   0.97096375    55.725509 
+    3350   0.79772742     2.166292   0.97143785    55.558075 
+    3400   0.80621087    2.1309217   0.97198456    55.816704 
+    3450   0.80540626    2.1727557   0.97263267    55.671283 
+    3500   0.80867606    2.1905129   0.97321538    55.390086 
+    3550   0.80917896    2.2144872   0.97370472    55.742085 
+    3600   0.80930722    2.2288938     0.974093     56.23064 
+    3650   0.80390523    2.2777327   0.97431886    56.084731 
+    3700   0.79620093    2.3143541   0.97435103    55.942797 
+    3750   0.80252393    2.2564638    0.9741875    56.042055 
+    3800   0.78981264    2.3156481    0.9739121    55.971352 
+    3850   0.80391951    2.1804938   0.97351088    55.855858 
+    3900   0.81268129    2.0855818   0.97308521    56.288315 
+    3950    0.7958182     2.175259   0.97273088    56.140141 
+    4000   0.80054484    2.1163279   0.97243129    56.366818 
+    4050   0.79760187     2.105362   0.97225308    56.684619 
+    4100   0.79283424    2.1357603     0.972206    56.203341 
+    4150   0.79543088    2.1036951   0.97227608    56.606315 
+    4200   0.79410999    2.1402049   0.97253758    56.277478 
+    4250    0.7985469    2.1285154   0.97293622    56.356076 
+    4300   0.79700387    2.1470614   0.97337091    56.722298 
+    4350   0.80479321    2.1403244   0.97384674    57.212574 
+    4400   0.79505512     2.224463   0.97434415    56.561877 
+    4450   0.78346648    2.3347865   0.97478611    56.681362 
+    4500   0.79811284     2.259123   0.97510069    57.365929 
+    4550   0.80015561    2.2345254   0.97523653     57.34799 
+    4600   0.79648318    2.2651869   0.97525975    57.502318 
+    4650   0.80524865    2.1943025   0.97507638    57.702488 
+    4700   0.80397778    2.1758629   0.97478268    57.162107 
+    4750   0.78914913    2.2470191    0.9744625    56.849565 
+    4800   0.79324889    2.2028993   0.97408817    57.572344 
+    4850   0.78993209     2.181763   0.97373372    57.683552 
+    4900   0.79041263    2.1604768   0.97348692    56.922312 
+    4950   0.79741332    2.1105901   0.97332545    57.488932 
+    5000    0.7891178     2.163416   0.97328963    57.365252 
+Loop time of 33.6467 on 1 procs for 5000 steps with 5120 atoms
+
+Performance: 51357.258 tau/day, 148.603 timesteps/s
+99.7% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 24.699     | 24.699     | 24.699     |   0.0 | 73.41
+Neigh   | 2.8894     | 2.8894     | 2.8894     |   0.0 |  8.59
+Comm    | 0.34907    | 0.34907    | 0.34907    |   0.0 |  1.04
+Output  | 0.0056     | 0.0056     | 0.0056     |   0.0 |  0.02
+Modify  | 5.5718     | 5.5718     | 5.5718     |   0.0 | 16.56
+Other   |            | 0.1319     |            |       |  0.39
+
+Nlocal:    5120 ave 5120 max 5120 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:    7594 ave 7594 max 7594 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    323081 ave 323081 max 323081 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 323081
+Ave neighs/atom = 63.1018
+Neighbor list builds = 248
+Dangerous builds = 0
+
+Please see the log.cite file for references relevant to this simulation
+
+Total wall time: 0:00:33
diff --git a/examples/USER/misc/rhok/log.22Sep2017.crystal.g++.4 b/examples/USER/misc/rhok/log.22Sep2017.crystal.g++.4
new file mode 100644
index 0000000000000000000000000000000000000000..cec9c69aff92e969c916400c4190e78160929e23
--- /dev/null
+++ b/examples/USER/misc/rhok/log.22Sep2017.crystal.g++.4
@@ -0,0 +1,187 @@
+LAMMPS (22 Sep 2017)
+  using 1 OpenMP thread(s) per MPI task
+units		lj
+dimension	3
+boundary	p p p
+atom_style  atomic
+
+# truncated and shifted LJ potential
+pair_style	lj/cut 2.5
+pair_modify	shift yes
+lattice	fcc 0.9731
+Lattice spacing in x,y,z = 1.6019 1.6019 1.6019
+region	my_box block 0 8.0   0 8.0   0 20.0
+create_box 1 my_box
+Created orthogonal box = (0 0 0) to (12.8152 12.8152 32.0379)
+  1 by 1 by 4 MPI processor grid
+region particles block 0 8.0 0 8.0 0 20.0
+create_atoms 1 region particles
+Created 5120 atoms
+pair_coeff 1 1 1.0 1.0 2.5
+pair_modify tail no
+pair_modify shift yes
+mass 1 1.0
+velocity all create 1.6 1 mom yes rot yes
+
+# simulation parameters
+neighbor	0.6 bin
+timestep	0.004
+run_style 	verlet
+fix ensemble all npt temp 0.8 0.8 4.0 aniso 2.185 2.185 8.0 pchain 32
+
+# computing long-range order (no bias is added since k=0)
+fix bias all rhok 16 0 0 0.0 0.0
+
+# output
+thermo 50
+thermo_style custom step temp press density f_bias[3]
+# dump dumpXYZ all xyz 2000 traj.xyz
+
+# NOTE: this is cut short to 5000 steps for demonstration purposes
+# run 100000
+run 5000
+Neighbor list info ...
+  update every 1 steps, delay 10 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 3.1
+  ghost atom cutoff = 3.1
+  binsize = 1.55, bins = 9 9 21
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair lj/cut, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 3.23 | 3.23 | 3.23 Mbytes
+Step Temp Press Density f_bias[3] 
+       0          1.6   -2.7568106       0.9731    71.554175 
+      50   0.78457786    3.1029192   0.97362639    54.327705 
+     100   0.85528971    2.4670259   0.97213457    55.189308 
+     150   0.85241818    2.3210306    0.9698027    56.138125 
+     200   0.82301385    2.3448692   0.96708227    55.735326 
+     250   0.83076383    2.0890816   0.96425763    55.320625 
+     300   0.81602823    2.0118796   0.96173925    54.095736 
+     350   0.81084006    1.9122192   0.95979392    54.526429 
+     400   0.80776593    1.8502174   0.95869117    54.434901 
+     450   0.80694697    1.8435873   0.95851085     53.20809 
+     500   0.81384248    1.8111331   0.95917305    53.419395 
+     550   0.81027072    1.9222272   0.96056019     54.36723 
+     600   0.81199582    2.0291945   0.96248486    54.888582 
+     650   0.82507964    2.0706462   0.96467227    55.807137 
+     700     0.832562    2.1471442    0.9668913    56.721267 
+     750   0.83358138    2.2674672     0.968984    56.723838 
+     800   0.83477542    2.3658275   0.97072603    56.234689 
+     850   0.84722921    2.3506233   0.97189674    56.262424 
+     900   0.83526965    2.4532068   0.97248856    56.219103 
+     950   0.83174583    2.4763958   0.97249527    56.409813 
+    1000   0.83022557    2.4334341   0.97194093    55.890858 
+    1050   0.83208978    2.3478416   0.97092452    54.934691 
+    1100   0.82789545     2.272404    0.9696152     54.90894 
+    1150   0.82678617    2.1798046   0.96819776    54.927782 
+    1200    0.8088841    2.1960256   0.96687735    54.914327 
+    1250   0.81512784    2.0736261   0.96579008    53.927291 
+    1300   0.81271067    2.0297138   0.96504188    54.289698 
+    1350    0.8201767    1.9493976   0.96464115    55.342131 
+    1400   0.80880489    2.0016987   0.96468463    55.757758 
+    1450    0.8114196    2.0282699   0.96514115    55.865676 
+    1500   0.81085664    2.0838361   0.96591869    56.553425 
+    1550   0.81257075    2.1283157   0.96694549    56.921544 
+    1600   0.82617645    2.1017986   0.96817075    56.858808 
+    1650   0.82616141    2.1885582   0.96941073    56.717917 
+    1700   0.81634174    2.2996967   0.97047447    56.453745 
+    1750   0.82447573    2.2924266   0.97128663    56.916813 
+    1800   0.83610432     2.236456   0.97178453    56.400752 
+    1850     0.824792    2.3103491   0.97197318    55.891368 
+    1900   0.82298989    2.3059287   0.97181084    55.680562 
+    1950   0.82098545    2.2801009   0.97138609    55.754404 
+    2000   0.81812031    2.2480166   0.97078591    55.801371 
+    2050   0.82822262    2.1208887   0.97004108    55.687001 
+    2100   0.79768162    2.2711186    0.9693017    55.459852 
+    2150   0.81817874    2.0680317   0.96860202    56.514744 
+    2200   0.80707412    2.1112032   0.96810521    55.504308 
+    2250   0.81650921    2.0077757   0.96781164    55.635717 
+    2300   0.80634656     2.066186   0.96777181    56.051088 
+    2350   0.80893174    2.0619084   0.96799042    56.548711 
+    2400   0.82453783    1.9585503   0.96846727    56.695111 
+    2450   0.81517275    2.0752617   0.96921631    56.614046 
+    2500   0.80969622    2.1624476    0.9701054    56.574846 
+    2550   0.80861922    2.2089505   0.97100787    57.072334 
+    2600   0.81468888    2.2293754   0.97192875    56.879416 
+    2650   0.82061239    2.2245462   0.97269723    55.442015 
+    2700   0.81687473    2.2792015   0.97319852    54.420301 
+    2750   0.81416567    2.2982988   0.97340467    54.469427 
+    2800   0.81978563    2.2418723   0.97332803    55.965451 
+    2850   0.82069759    2.1988948   0.97302752    56.686807 
+    2900   0.80631184    2.2684466   0.97261407    56.585682 
+    2950   0.81759744    2.1312328   0.97207888    56.812431 
+    3000   0.80748056     2.152676   0.97151807    57.178849 
+    3050   0.80789237     2.118162   0.97103728    57.433724 
+    3100   0.79882523    2.1414744   0.97070338     57.34686 
+    3150   0.79803949    2.1359043   0.97052875    57.382544 
+    3200   0.79170386    2.1548392   0.97049349    56.465806 
+    3250   0.78848813    2.1990144   0.97067557    55.929088 
+    3300   0.79820555    2.1304609   0.97101444    55.624487 
+    3350   0.79250565    2.1971235   0.97149233    55.933615 
+    3400   0.80584844    2.1417239   0.97206083     55.85922 
+    3450   0.80685744    2.1640501   0.97266047    55.135963 
+    3500   0.80751888    2.1858277   0.97318703    55.407581 
+    3550   0.79882754    2.2796452   0.97363149    55.392366 
+    3600   0.80219171    2.2715765   0.97392571    55.867887 
+    3650   0.79061794    2.3492866   0.97410985      56.0192 
+    3700    0.8058483    2.2327904   0.97411924    56.491303 
+    3750   0.79460746    2.2941868   0.97397764    55.929912 
+    3800   0.80447478    2.2018009   0.97367627    55.663208 
+    3850   0.80355335      2.17638   0.97333164    55.637261 
+    3900   0.80388417    2.1531434    0.9729647     56.03794 
+    3950   0.79557409    2.1853318    0.9726503    56.132348 
+    4000   0.79547396    2.1457051   0.97235244    55.552675 
+    4050    0.8058384    2.0637678   0.97213346    56.185416 
+    4100    0.7976931    2.1028246   0.97208255    56.050347 
+    4150   0.79555522     2.115473   0.97216375    56.868136 
+    4200   0.79324134    2.1510383   0.97246129    56.462635 
+    4250   0.80788167    2.0534887   0.97287821    55.650788 
+    4300   0.79389865    2.2019815   0.97337765    55.596846 
+    4350   0.79786309    2.1851119   0.97389825    57.000921 
+    4400   0.79986518    2.1997541   0.97443778    57.551564 
+    4450    0.8063901    2.1893874   0.97493151    57.236138 
+    4500   0.80005802     2.250364   0.97533075    57.341358 
+    4550   0.79707443    2.2995576   0.97557554    57.338713 
+    4600   0.79869949    2.2807889   0.97563277    57.084504 
+    4650   0.79694427    2.2673215   0.97544638    57.025663 
+    4700   0.79023986    2.2884131   0.97511483    57.131188 
+    4750   0.79566823    2.2215519   0.97464304    57.045676 
+    4800   0.78936986    2.2268037   0.97410626    57.384178 
+    4850   0.79025913    2.1836718     0.973616     57.78438 
+    4900   0.80138424    2.0657609    0.9732124    57.888266 
+    4950   0.77853735     2.207944   0.97296347    57.312213 
+    5000   0.79115984    2.1035893   0.97285578    57.109472 
+Loop time of 9.53489 on 4 procs for 5000 steps with 5120 atoms
+
+Performance: 181229.223 tau/day, 524.390 timesteps/s
+99.1% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 6.312      | 6.4238     | 6.5139     |   3.1 | 67.37
+Neigh   | 0.72062    | 0.73538    | 0.74531    |   1.1 |  7.71
+Comm    | 0.52697    | 0.64152    | 0.78688    |  14.1 |  6.73
+Output  | 0.0028393  | 0.0029888  | 0.0033851  |   0.4 |  0.03
+Modify  | 1.6249     | 1.669      | 1.7253     |   2.9 | 17.50
+Other   |            | 0.06221    |            |       |  0.65
+
+Nlocal:    1280 ave 1289 max 1266 min
+Histogram: 1 0 0 0 0 1 0 0 0 2
+Nghost:    3346.25 ave 3379 max 3331 min
+Histogram: 1 2 0 0 0 0 0 0 0 1
+Neighs:    80701.8 ave 81534 max 79755 min
+Histogram: 1 0 1 0 0 0 0 0 1 1
+
+Total # of neighbors = 322807
+Ave neighs/atom = 63.0482
+Neighbor list builds = 248
+Dangerous builds = 0
+
+Please see the log.cite file for references relevant to this simulation
+
+Total wall time: 0:00:09
diff --git a/examples/USER/misc/rhok/log.22Sep2017.pinning.g++.1 b/examples/USER/misc/rhok/log.22Sep2017.pinning.g++.1
new file mode 100644
index 0000000000000000000000000000000000000000..c2aaa9a58118475f45c0c2887eec58ac8588a631
--- /dev/null
+++ b/examples/USER/misc/rhok/log.22Sep2017.pinning.g++.1
@@ -0,0 +1,186 @@
+LAMMPS (22 Sep 2017)
+  using 1 OpenMP thread(s) per MPI task
+units       lj
+dimension   3
+boundary    p p p
+atom_style  atomic
+
+# truncated and shifted LJ potential
+pair_style  lj/cut 2.5
+pair_modify shift yes
+read_data   data.halfhalf
+  orthogonal box = (0 0 0) to (12.8152 12.8152 34)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  5120 atoms
+  reading velocities ...
+  5120 velocities
+pair_coeff  1 1 1.0 1.0 2.5
+mass        1 1.0
+
+# simulation parameters
+neighbor	0.6 bin
+timestep	0.004
+run_style 	verlet
+
+velocity all create 0.8 1 mom yes rot yes
+fix ensemble all npt temp 0.8 0.8 4.0 z 2.185 2.185 8.0
+fix 100 all momentum 100 linear 1 1 1
+
+# harmonic rho_k bias-field
+#                 nx ny nz k     a
+fix bias all rhok 16 0  0  4.0   26.00
+
+# output                                U_bias rho_k_RE  rho_k_IM |rho_k|
+thermo_style custom step temp pzz pe lz f_bias f_bias[1] f_bias[2] f_bias[3]
+thermo 50
+dump dumpXYZ all xyz 500 traj.xyz
+
+# NOTE: run reduced for demonstration purposes
+# run 50000
+run 5000
+Neighbor list info ...
+  update every 1 steps, delay 10 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 3.1
+  ghost atom cutoff = 3.1
+  binsize = 1.55, bins = 9 9 22
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair lj/cut, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 5.723 | 5.723 | 5.723 Mbytes
+Step Temp Pzz PotEng Lz f_bias f_bias[1] f_bias[2] f_bias[3] 
+       0          0.8    5.1566801   -4.8556711           34    179.52419    35.473155   -0.2832763    35.474286 
+      50     1.072533    3.8158392   -5.2704532    34.024206  0.010596224    25.927135 -0.063106738    25.927212 
+     100    1.1093231    3.6703116   -5.3380965    34.094814    1.8552612    26.958236  -0.51403326    26.963136 
+     150    1.1080721    3.8202601   -5.3568368    34.207473   0.39188605    26.420755   -1.0759575    26.442655 
+     200    1.1435287    3.3445987   -5.4365298    34.354119    3.0758718    27.239465  -0.19115251    27.240135 
+     250    1.1203046    3.4669456   -5.4293867    34.511473    0.8543814    26.652785  -0.20818214    26.653598 
+     300    1.1012709    3.4583154   -5.4281803    34.664509    2.4837156    27.097674    0.9518628    27.114387 
+     350    1.0439632    3.8953869    -5.368619    34.810399   0.55385719    26.518391   0.64525272     26.52624 
+     400    1.0083878    4.0523864   -5.3418278    34.957669   0.34806057    26.404011   0.83368604    26.417169 
+     450    0.9675244     4.310087   -5.3089468    35.114208    0.7921285    26.607512     1.077889    26.629336 
+     500   0.94605476    4.1050097   -5.3062273    35.284018   0.87757754    26.639125    1.1140858    26.662411 
+     550   0.92662323    3.9299063   -5.3062927    35.458565    1.3746441    26.773494    1.7256603    26.829049 
+     600   0.89723165    3.7683555    -5.289725    35.629881   0.46692943    26.372973    2.4135502    26.483182 
+     650   0.90612566    3.1098837   -5.3267851    35.788537  0.032662126    25.918784    3.2982102    26.127793 
+     700    0.9152508    2.6527976   -5.3597799    35.923343  0.014621588    25.834591    3.6093492    26.085503 
+     750   0.90156356    2.3474851   -5.3545938    36.031813   0.75225637    26.307204    4.0247215    26.613293 
+     800   0.89748513    1.9825103   -5.3610202    36.113888   0.33402511    26.261326    2.7858039    26.408672 
+     850   0.89496343    1.8906342   -5.3673514    36.169424   0.85431557    26.534648    2.5150347    26.653573 
+     900   0.89463983    1.5654217   -5.3753283     36.20181    1.5689239    26.764737    2.5474794    26.885699 
+     950   0.88663832    1.4399476   -5.3703322    36.209971  0.044436903    25.818418    1.2963356    25.850941 
+    1000   0.87407208     1.485718   -5.3572665    36.195386    1.4405611    26.828072    1.0520795    26.848693 
+    1050   0.87580489     1.163155   -5.3647269    36.160279   0.15319559    26.234791    1.4845964    26.276763 
+    1100   0.86978111    1.3743181   -5.3594907    36.104958    1.1313537     25.19895    1.5711793    25.247885 
+    1150   0.86987861    1.3212927   -5.3628503    36.035486  0.039865678    25.841762   0.93898962    25.858816 
+    1200   0.87142486    1.3293818   -5.3676854    35.954411   0.16827389     25.70952  -0.14639427    25.709936 
+    1250   0.87582265    1.3203803   -5.3764058     35.86575   0.25946652    25.639682  0.082696867    25.639815 
+    1300   0.87371627    1.4680294    -5.375151    35.772824   0.17697069    25.701417    0.2397926    25.702535 
+    1350   0.88617453    1.5923057   -5.3954912    35.681046 0.00049155526    25.973634  -0.74521794    25.984323 
+    1400   0.87809636    1.5821707   -5.3850722    35.594706  0.024050814    26.107395  -0.34393685     26.10966 
+    1450   0.87912192    1.7820174   -5.3885842    35.514722   0.20999914    25.667238  -0.66933655    25.675964 
+    1500   0.88293618    2.0295275   -5.3963602    35.443445   0.60232374    25.376395   -1.9501461    25.451218 
+    1550   0.90012542    1.9476472   -5.4249456    35.382791    0.4488038    26.448928   -1.1452474    26.473711 
+    1600   0.89155063    2.2462603   -5.4153432    35.332095  0.039621687    26.138157  -0.36825239    26.140751 
+    1650    0.8942624     2.343747   -5.4233433    35.294954 0.0089980332    26.064277   0.38189192    26.067075 
+    1700   0.90047841     2.451289   -5.4376312     35.27234   0.86985171    26.646438   0.83408084    26.659489 
+    1750   0.87586052    2.6381221   -5.4067182    35.264564     6.346204    27.652722    2.6699692     27.78132 
+    1800   0.87392582    2.6338176   -5.4109056    35.270073  0.046414129    26.016188    2.6651053    26.152339 
+    1850   0.86540415    2.5434301   -5.4058587    35.285902  0.054615472    26.074279    2.1799787    26.165251 
+    1900   0.87043082    2.5776772   -5.4216997    35.309062   0.68978148     26.38648    3.2614091    26.587274 
+    1950   0.86281992    2.3107762   -5.4188978    35.338501 0.0072672577    25.736893    3.2375012     25.93972 
+    2000   0.85905576    2.2894047   -5.4215995     35.36787  0.095633435    26.072085    2.7685848     26.21867 
+    2050   0.85793751    2.2382039   -5.4279351    35.395213   0.13602344    25.598457    2.6881027    25.739209 
+    2100   0.85585253    2.0765811   -5.4324511    35.418877 0.0059888115    25.754128    3.1436222    25.945279 
+    2150   0.86701057    1.8449875   -5.4562208    35.436124  0.097328618    25.413697    4.3268293      25.7794 
+    2200   0.85168154    1.9024923   -5.4395776     35.44246   0.20764576    25.094788    5.4406104    25.677784 
+    2250    0.8429719     1.870335   -5.4320586    35.438363   0.34419961    24.998478    5.4475709    25.585151 
+    2300   0.84176891    1.7100228   -5.4351472    35.422863   0.76036958    24.697018    5.8629967    25.383409 
+    2350   0.84601588    1.8539039   -5.4456629    35.395979   0.38437531    25.647986    6.4163366    26.438392 
+    2400   0.84637647    1.6299091   -5.4498948     35.36125  0.074236719    24.995872    7.8269968    26.192661 
+    2450   0.85650449    1.6828907   -5.4683101    35.316669    0.3671827    25.280669    7.7040329    26.428476 
+    2500   0.84963707    1.7305222   -5.4605394    35.265508    0.1406965    25.236741    7.2780025    26.265232 
+    2550   0.84084365    1.8758368   -5.4497083    35.208725   0.33937687    24.544376    7.2334512    25.588067 
+    2600   0.85317342    1.7781674   -5.4702734    35.149747   0.60378248    24.046307    8.3370138    25.450554 
+    2650   0.85487644    2.0065374   -5.4747643    35.090431   0.22483651    24.937101    8.4669004    26.335288 
+    2700   0.84550083    1.9363031   -5.4628401    35.034349   0.43442577    24.250196    7.9943738    25.533939 
+    2750   0.85843419    2.0473138    -5.484528    34.980671   0.45959294     24.17438     8.179356    25.520629 
+    2800   0.86047607    2.0754522   -5.4899966    34.932466 0.00038123477    24.619856    8.3153434    25.986194 
+    2850   0.86375793    2.2751324   -5.4977459    34.892337 0.0016455263    24.927259     7.289789    25.971316 
+    2900   0.84438986    2.3790377   -5.4721407    34.863512    1.2372354    25.819445     7.132603    26.786523 
+    2950    0.8551438    2.2721926   -5.4925958     34.84473    1.5405388    25.956466     6.976385     26.87765 
+    3000   0.83737707    2.4009609   -5.4707188    34.834171   0.28507766    25.643879    6.1778846    26.377543 
+    3050   0.84923235    2.4187994   -5.4938573    34.830836  0.036512025    25.139252    7.1457857    26.135115 
+    3100   0.83872396    2.3811576   -5.4838787    34.833673     0.246984     24.21358    8.4588719    25.648586 
+    3150   0.83957817    2.3901421   -5.4913118     34.84163   0.20477984    24.309852    10.088243    26.319984 
+    3200   0.84283033      2.17292   -5.5025459    34.853975    1.3367154    24.581685     10.72011    26.817531 
+    3250   0.84002379    2.1247709   -5.5044955    34.866106   0.11434509    24.463842    9.4874246    26.239108 
+    3300   0.83311101    2.1492058   -5.5000847    34.875625 0.0053284993    23.815298    10.560222    26.051616 
+    3350   0.83216701    1.9587594   -5.5043446    34.881623   0.58985562    23.934253    11.475462    26.543073 
+    3400   0.82396039    2.1914951   -5.4971506    34.881199  0.098206955    23.393402     10.82936    25.778407 
+    3450   0.83483253    1.9783612   -5.5182844    34.877327 7.6571212e-05    23.675355    10.761012    26.006188 
+    3500   0.82712062    1.9718522   -5.5111818    34.869214  0.014836125    23.314122    11.312845    25.913872 
+    3550    0.8342927    1.9114357   -5.5259968    34.855179    1.4050442    22.442758    11.377192    25.161834 
+    3600   0.82631637    1.9836457   -5.5176244    34.835738  0.084637609    23.413286    10.824194    25.794285 
+    3650   0.82425697    1.9218541   -5.5178548    34.811901   0.11000071    22.788707    12.022258    25.765478 
+    3700   0.82491437    1.9624493    -5.521738    34.782417  0.034984027    23.011433    12.384217    26.132257 
+    3750   0.82758167    2.0856442   -5.5283493    34.748872  0.001362163    23.030662    12.122144    26.026098 
+    3800   0.81891108    1.9858824   -5.5177774    34.714618   0.17075993     23.21344    12.345683    26.292199 
+    3850   0.83392227    2.1631514   -5.5426333    34.681146   0.82106473    22.510204    11.678329    25.359272 
+    3900   0.82230654    2.0017132   -5.5276756    34.650221   0.48735732    23.444809    12.339117    26.493638 
+    3950   0.81929288    2.1749936   -5.5256673     34.61976  0.089219805    23.540062    11.527925    26.211211 
+    4000   0.83415169    2.0446791   -5.5506187    34.591266   0.15593937    23.742282     11.26508    26.279231 
+    4050   0.82362522    2.1998083   -5.5375157    34.563164   0.25405351    23.913834    11.081011    26.356408 
+    4100   0.82589505    2.3074345    -5.543718    34.537763  0.080213125     24.03253    10.435108    26.200266 
+    4150   0.83855297    2.2424199   -5.5658171    34.517758   0.62913338    23.974257    8.5079223    25.439138 
+    4200   0.82522111    2.2622619   -5.5493275    34.502472    1.8756517    25.754617    7.9996898    26.968414 
+    4250   0.82083124    2.4135193   -5.5464932      34.4919    1.1217436      25.8944    6.7070444    26.748914 
+    4300   0.83059704    2.1375109   -5.5653245    34.487366   0.53623038     26.05979    4.9072346    26.517798 
+    4350   0.82755047    2.1159821   -5.5650889    34.484506   0.10017723    25.405936    4.3532342    25.776195 
+    4400   0.83192877     2.180851   -5.5759565    34.480909  0.053664012    25.993034    2.9844338    26.163805 
+    4450   0.81860572    2.2333381   -5.5602138    34.477183  0.037864077    25.792233    1.9038859    25.862406 
+    4500   0.82821762    2.1142023   -5.5788682    34.474784  0.088221344     26.20329   0.59417897    26.210025 
+    4550    0.8205154    2.0896984   -5.5715531    34.472405  0.016076192    26.083166  -0.58187024    26.089655 
+    4600   0.81294948    2.2274108   -5.5642678    34.469014  0.033774986    25.869616   0.14951307    25.870048 
+    4650   0.80890532    2.1556346   -5.5622407    34.465277   0.67402048    25.413229  -0.56341819    25.419474 
+    4700   0.82070227    1.9852605    -5.583747    34.460206  0.052623237    26.158394  -0.44673492    26.162209 
+    4750   0.81451857    2.1097726   -5.5779782    34.451438   0.12221733    25.733718   -0.9911436    25.752798 
+    4800   0.81300453    2.0211325   -5.5790076    34.439504   0.34536082    26.358606   -1.7335167    26.415548 
+    4850   0.82035497    1.9489595   -5.5929886    34.424097   0.70899626    26.575865   -1.0191012    26.595397 
+    4900    0.8127066    2.1312269    -5.584271    34.405998  0.087959314    26.185217   -1.1329105    26.209713 
+    4950   0.81252621    2.1094866   -5.5866296    34.387869   0.79067667    26.564722   -1.8456354    26.628759 
+    5000   0.80575936    2.1875995    -5.579054    34.370679  0.031787364    26.027557   -2.2666774     26.12607 
+Loop time of 32.2397 on 1 procs for 5000 steps with 5120 atoms
+
+Performance: 53598.557 tau/day, 155.088 timesteps/s
+99.4% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 22.967     | 22.967     | 22.967     |   0.0 | 71.24
+Neigh   | 2.9914     | 2.9914     | 2.9914     |   0.0 |  9.28
+Comm    | 0.37485    | 0.37485    | 0.37485    |   0.0 |  1.16
+Output  | 0.064337   | 0.064337   | 0.064337   |   0.0 |  0.20
+Modify  | 5.7143     | 5.7143     | 5.7143     |   0.0 | 17.72
+Other   |            | 0.1281     |            |       |  0.40
+
+Nlocal:    5120 ave 5120 max 5120 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:    7962 ave 7962 max 7962 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    296101 ave 296101 max 296101 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 296101
+Ave neighs/atom = 57.8322
+Neighbor list builds = 283
+Dangerous builds = 0
+
+Please see the log.cite file for references relevant to this simulation
+
+Total wall time: 0:00:32
diff --git a/examples/USER/misc/rhok/log.22Sep2017.pinning.g++.4 b/examples/USER/misc/rhok/log.22Sep2017.pinning.g++.4
new file mode 100644
index 0000000000000000000000000000000000000000..c7a77fb9cca5d5c9b190946d6a47459ffd048952
--- /dev/null
+++ b/examples/USER/misc/rhok/log.22Sep2017.pinning.g++.4
@@ -0,0 +1,186 @@
+LAMMPS (22 Sep 2017)
+  using 1 OpenMP thread(s) per MPI task
+units       lj
+dimension   3
+boundary    p p p
+atom_style  atomic
+
+# truncated and shifted LJ potential
+pair_style  lj/cut 2.5
+pair_modify shift yes
+read_data   data.halfhalf
+  orthogonal box = (0 0 0) to (12.8152 12.8152 34)
+  1 by 1 by 4 MPI processor grid
+  reading atoms ...
+  5120 atoms
+  reading velocities ...
+  5120 velocities
+pair_coeff  1 1 1.0 1.0 2.5
+mass        1 1.0
+
+# simulation parameters
+neighbor	0.6 bin
+timestep	0.004
+run_style 	verlet
+
+velocity all create 0.8 1 mom yes rot yes
+fix ensemble all npt temp 0.8 0.8 4.0 z 2.185 2.185 8.0
+fix 100 all momentum 100 linear 1 1 1
+
+# harmonic rho_k bias-field
+#                 nx ny nz k     a
+fix bias all rhok 16 0  0  4.0   26.00
+
+# output                                U_bias rho_k_RE  rho_k_IM |rho_k|
+thermo_style custom step temp pzz pe lz f_bias f_bias[1] f_bias[2] f_bias[3]
+thermo 50
+dump dumpXYZ all xyz 500 traj.xyz
+
+# NOTE: run reduced for demonstration purposes
+# run 50000
+run 5000
+Neighbor list info ...
+  update every 1 steps, delay 10 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 3.1
+  ghost atom cutoff = 3.1
+  binsize = 1.55, bins = 9 9 22
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair lj/cut, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 4.023 | 4.027 | 4.03 Mbytes
+Step Temp Pzz PotEng Lz f_bias f_bias[1] f_bias[2] f_bias[3] 
+       0          0.8    5.1872229   -4.8361269           34    152.02869    34.711006  -0.72709593     34.71862 
+      50    1.0819371    3.9250728   -5.2655842     34.02563   0.51385908    26.505979    0.2187864    26.506882 
+     100    1.1182271    3.5397251   -5.3331401    34.100753    2.1059904    27.025883  -0.12127124    27.026156 
+     150    1.1121434    3.7845571   -5.3440494    34.213993    1.7206575    26.894862   -1.3261751    26.927539 
+     200    1.1446439    3.4114364   -5.4199119    34.358914     2.383615    27.054401   -1.4211151    27.091699 
+     250    1.1115073    3.5603047   -5.3988013    34.517397   0.60819391     26.51404   -1.4089688     26.55145 
+     300    1.0828478    3.7411116   -5.3842818    34.673948   0.73987465    26.528178    -2.062382    26.608225 
+     350    1.0342597    3.9206217   -5.3384367    34.825105   0.99965014    26.657737   -1.6211152    26.706983 
+     400    1.0064356    3.9929044    -5.324003     34.97579   0.41927007    26.402623   -1.7087432    26.457859 
+     450   0.96799277    4.2764255   -5.2947175    35.133839   0.77739461    26.503753   -2.5217998    26.623456 
+     500   0.94691076    4.1007962   -5.2922124    35.301893   0.17015805    26.212252   -2.0421698    26.291683 
+     550   0.93675297    3.7404088   -5.3056917    35.474301   0.56247039    26.335926    -3.205722    26.530316 
+     600   0.92707577    3.5439822   -5.3176094    35.641282   0.04054693    25.679461   -3.0301039    25.857615 
+     650   0.91828226    3.1833648   -5.3278237    35.794766    0.8427054      26.4003   -3.6331211    26.649117 
+     700    0.9084826    2.8374306   -5.3327944    35.928138    1.5635222    26.605971   -3.8575939    26.884173 
+     750   0.91219559      2.46172   -5.3548299    36.039156    6.3772911    27.350725   -4.8971146    27.785678 
+     800   0.90000337    2.2187716   -5.3499181    36.126451    4.9080838    27.085156   -5.1291678    27.566538 
+     850    0.9003432    1.8634244   -5.3614648    36.189019 0.0081092188    25.497333   -5.4038153    26.063676 
+     900   0.89741573    1.5697398   -5.3660799    36.226074  0.011155479    25.312724   -6.2574069    26.074684 
+     950   0.88871454    1.4427205   -5.3604669    36.237407    0.3287078    25.659237    -6.232896    26.405406 
+    1000   0.88606353    1.3088636   -5.3626576    36.226015   0.30068168    24.554896   -7.2832017    25.612262 
+    1050   0.88527541    1.3194263   -5.3666773     36.19311   0.10646314    24.514921   -7.9419424     25.76928 
+    1100   0.87522001    1.2852124   -5.3556811    36.143056   0.13675329    24.865981    -8.446822    26.261489 
+    1150    0.8805978     1.246973   -5.3671716      36.0781 0.00043275463    24.187039   -9.4985495     25.98529 
+    1200   0.85711495     1.376588   -5.3346243    36.002427   0.47623639    23.691349   -9.4648541    25.512026 
+    1250   0.88116805    1.3562001   -5.3731036    35.919289   0.32797055    23.322103    -10.54448    25.595049 
+    1300   0.87178482    1.5046564   -5.3610798    35.831278   0.17704849    24.190231   -10.314689     26.29753 
+    1350   0.87022621    1.6830825   -5.3603618    35.743318 0.0052854997    23.731157   -10.747465    26.051408 
+    1400   0.89019669    1.6144812   -5.3921986    35.659687    1.4152796      22.8393   -10.551347    25.158787 
+    1450   0.88852819    1.7587964   -5.3918592    35.580319   0.63560961    23.599033      -12.195    26.563742 
+    1500   0.89029085    1.8772498   -5.3966098    35.509232   0.20895386    23.055083   -12.703366    26.323229 
+    1550   0.88639722    2.2284824   -5.3933288    35.449043   0.44413965    22.448774   -12.156068    25.528757 
+    1600   0.88816451    2.2167704   -5.3994757    35.401661   0.12210235    23.108351    -12.44643    26.247085 
+    1650   0.89154791    2.3397824   -5.4086923    35.365815    0.4820208    23.090699   -12.984179    26.490928 
+    1700   0.88518032    2.5351236   -5.4041601    35.343757  0.080806002    22.749825    -12.99762    26.201005 
+    1750   0.86848721    2.5527491   -5.3851928    35.336433  0.045102165    22.357111   -13.564328     26.15017 
+    1800   0.88501061    2.5215825   -5.4169341    35.340849   0.27488483    22.086584   -14.408273    26.370732 
+    1850    0.8716061    2.5809558   -5.4045854    35.355038  0.042909785    21.270956   -14.695278    25.853525 
+    1900   0.85672517    2.4836326   -5.3902797    35.375469   0.72877764    21.639909   -15.474764    26.603646 
+    1950   0.85133731    2.3141629   -5.3902573    35.398523 0.0016908803    21.106617   -15.132733    25.970924 
+    2000   0.86152109    2.1562002   -5.4132601    35.419851     0.371016    21.325237   -15.614625    26.430706 
+    2050   0.86243551     2.019931   -5.4220349    35.436069  0.017935421      20.4131   -16.255418    26.094698 
+    2100   0.87417672    1.8083823   -5.4464117    35.445091   0.18429432     19.75625   -17.365705    26.303558 
+    2150   0.85872128    1.7608768   -5.4293103     35.44341   0.91209166    20.149648   -17.480387    26.675312 
+    2200   0.86615373    1.8372778   -5.4458315    35.430616   0.10151993    18.559234   -17.885469      25.7747 
+    2250   0.85053605    1.7198437   -5.4272104    35.408688   0.96154548    17.200861   -18.562206    25.306622 
+    2300   0.85400281    1.7939644   -5.4364682    35.377708   0.12283263    18.759325   -18.358539    26.247823 
+    2350   0.85495278    1.5856029   -5.4417321    35.337987   0.20564329    18.967923   -18.248149    26.320658 
+    2400   0.84606771    1.7782708   -5.4315646    35.287411   0.10063977    19.185527   -17.878215    26.224321 
+    2450   0.85210051    1.8190391   -5.4432116    35.232321   0.69988647    19.268861   -18.325448     26.59156 
+    2500   0.85304715    1.7466204   -5.4470889    35.175245 0.0048314937     18.09176    -18.74157     26.04915 
+    2550   0.85401123    1.8601945   -5.4509309    35.115748   0.99467901    17.170045   -18.574587    25.294777 
+    2600   0.85778606     1.974012   -5.4586742    35.058013 0.0026599702    17.438966   -19.333395    26.036469 
+    2650    0.8521239    2.0606329   -5.4526006    35.003616  0.091056354     17.16363   -19.244738    25.786627 
+    2700   0.85918482    2.0766792   -5.4658947    34.954171   0.89590606     15.77108   -19.822153    25.330707 
+    2750   0.85786577     2.225549   -5.4667773    34.911468   0.26577575    15.769018   -21.128817    26.364538 
+    2800   0.86764664    2.2325018   -5.4849414    34.877604   0.47167555    14.950515   -20.675229    25.514369 
+    2850   0.85209564    2.3434319    -5.465734    34.852715    2.7350296     13.51553   -20.829996    24.830592 
+    2900   0.85757283    2.3512971   -5.4786051    34.836138   0.14816492     14.06033   -21.545946    25.727819 
+    2950   0.86098926    2.3480431   -5.4890615    34.826408   0.26401534    13.381395   -22.714827    26.363329 
+    3000   0.85413421    2.3243973   -5.4844129    34.823242  0.024244334    12.739486   -22.538687    25.889899 
+    3050   0.85015323    2.5479266   -5.4844303    34.825228    0.4463147    12.990582   -21.975063    25.527605 
+    3100    0.8530523    2.3643505    -5.495343    34.834883   0.12144265    12.844293   -22.321989    25.753583 
+    3150   0.85098478    2.2521299   -5.4990526    34.848419   0.33194916    12.747856   -23.126671      26.4074 
+    3200   0.84391449    2.2650057    -5.495222    34.862626  0.031888328    12.788845   -22.782174     26.12627 
+    3250   0.84807155    2.1715388   -5.5080873    34.877548  0.082426694    13.316219    -22.09441    25.796989 
+    3300   0.83028242     2.242889   -5.4878846     34.89175    1.1334975    14.326678   -22.593363    26.752827 
+    3350   0.82924001    2.0324002   -5.4924558    34.903232   0.35473989    14.354166   -22.181868    26.421153 
+    3400   0.83032841    2.0003371   -5.4997142    34.908733  0.041677437    14.528378   -21.735998    26.144356 
+    3450   0.82908891    1.8683902   -5.5029185    34.907936   0.02365857    15.069507   -21.053887    25.891237 
+    3500   0.82842914    1.9165344   -5.5064218    34.898681   0.17663531     15.27043   -20.674834    25.702817 
+    3550   0.82735822      1.98221   -5.5088197     34.88272    1.5607134    14.915228   -20.208431    25.116622 
+    3600   0.82642915    1.8422766   -5.5110752      34.8611    1.1861112    15.312314   -20.051953    25.229899 
+    3650   0.82556781    1.9351408   -5.5130349    34.833406     1.018872    16.152478   -19.454871    25.286252 
+    3700   0.82360651    1.9791184   -5.5128431    34.802021   0.14080727    16.907104   -19.401616    25.734663 
+    3750   0.83017793    1.9855734   -5.5253254    34.768644   0.15311334    16.969506   -19.331958    25.723311 
+    3800   0.82362926    2.1029656   -5.5179624    34.734178   0.10807487    17.892584   -18.542426     25.76754 
+    3850   0.82313508    2.0781681   -5.5196175     34.70093   0.13343085    19.072706    -17.28778    25.741707 
+    3900   0.83643385    2.0570262   -5.5421224    34.669761 0.00022792038    19.551677     -17.1548    26.010675 
+    3950   0.82346174    2.0842322   -5.5252757    34.640849 0.0093759386    20.892792   -15.590263    26.068469 
+    4000   0.83485868    2.1196669   -5.5451736    34.612396   0.31198053    21.630258   -15.126984    26.394956 
+    4050   0.82729429    2.2033274   -5.5365945    34.585721   0.53752252    21.283533   -14.011497    25.481578 
+    4100   0.82040242    2.1757309   -5.5292269    34.562271   0.36031984    22.047609   -12.961927    25.575548 
+    4150   0.81932521     2.285666   -5.5307807    34.542102   0.84343149    22.486289    -11.70555    25.350604 
+    4200   0.83819319     2.231174   -5.5625532    34.526447   0.47190752    23.311855    -12.57189    26.485751 
+    4250   0.82542274    2.1874789   -5.5472057    34.513795   0.70518398    23.411553   -12.614639    26.593795 
+    4300   0.81971158     2.241167   -5.5424504    34.503969   0.26707612    23.089805   -12.727793    26.365429 
+    4350   0.83255377    2.1295532   -5.5657895    34.496326  0.072548591    23.003138    -12.52181    26.190458 
+    4400    0.8128474    2.3327845   -5.5402264    34.490126 0.0013023434    23.020811   -12.029795    25.974482 
+    4450   0.82013491    2.3069915   -5.5554953    34.488039  0.041123896    23.632908   -11.178674    26.143394 
+    4500   0.81411544    2.2247193   -5.5509183    34.488014   0.54440601    23.010678   -10.938506    25.478269 
+    4550   0.82814624    2.1142779   -5.5763482    34.487885    0.1518945    23.696817   -11.351972    26.275585 
+    4600   0.82929492     2.090881   -5.5823492    34.486698 0.0045520899    23.538527   -10.929741    25.952292 
+    4650   0.81061417    1.9818043   -5.5584018    34.484038  0.012526806    23.993543   -10.219174    26.079142 
+    4700   0.81816105    1.9605811   -5.5735005    34.476764    1.2079835    25.151166   -9.1888856    26.777169 
+    4750   0.81657042    2.0064313   -5.5744795    34.465784    1.2045017    25.487486   -8.2063886    26.776048 
+    4800   0.81789335    2.0838696   -5.5796632    34.451996   0.27642542    24.647157    -7.023095     25.62823 
+    4850   0.80649339    1.9892413   -5.5654796    34.436067  0.024697945     25.09823   -6.3492244    25.888874 
+    4900   0.81673441    2.0125635   -5.5835037    34.416236 0.0011188576    25.446818   -5.2182483    25.976348 
+    4950   0.82250033    1.9770391   -5.5946082    34.394723   0.72696707     26.37002   -3.5122842    26.602896 
+    5000   0.80762758     2.075517   -5.5746076    34.371696   0.12796344    26.102184   -2.8094827    26.252946 
+Loop time of 10.3394 on 4 procs for 5000 steps with 5120 atoms
+
+Performance: 167127.370 tau/day, 483.586 timesteps/s
+99.0% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 5.029      | 6.0128     | 7.1918     |  35.0 | 58.15
+Neigh   | 0.65673    | 0.75825    | 0.88597    |  10.3 |  7.33
+Comm    | 0.43982    | 1.5284     | 2.4112     |  60.5 | 14.78
+Output  | 0.022835   | 0.023039   | 0.023453   |   0.2 |  0.22
+Modify  | 1.7294     | 1.9472     | 2.5687     |  25.7 | 18.83
+Other   |            | 0.06978    |            |       |  0.67
+
+Nlocal:    1280 ave 1404 max 1214 min
+Histogram: 2 0 0 1 0 0 0 0 0 1
+Nghost:    3521.25 ave 3581 max 3426 min
+Histogram: 1 0 0 0 0 0 1 1 0 1
+Neighs:    73872.2 ave 87973 max 64161 min
+Histogram: 1 1 0 0 1 0 0 0 0 1
+
+Total # of neighbors = 295489
+Ave neighs/atom = 57.7127
+Neighbor list builds = 278
+Dangerous builds = 0
+
+Please see the log.cite file for references relevant to this simulation
+
+Total wall time: 0:00:10
diff --git a/examples/USER/misc/rhok/log.22Sep2017.setup.g++.1 b/examples/USER/misc/rhok/log.22Sep2017.setup.g++.1
new file mode 100644
index 0000000000000000000000000000000000000000..8606d4ed944137c289d272ad94a0eb47ad5bee86
--- /dev/null
+++ b/examples/USER/misc/rhok/log.22Sep2017.setup.g++.1
@@ -0,0 +1,141 @@
+LAMMPS (22 Sep 2017)
+  using 1 OpenMP thread(s) per MPI task
+units lj
+dimension 3
+boundary p p p
+atom_style atomic
+
+# truncated and shifted LJ potential
+pair_style lj/cut 2.5
+pair_modify shift yes
+
+# fcc lattice
+lattice	fcc 0.9731
+Lattice spacing in x,y,z = 1.6019 1.6019 1.6019
+region my_box block 0 8.0   0 8.0   0 20.0
+create_box 1 my_box
+Created orthogonal box = (0 0 0) to (12.8152 12.8152 32.0379)
+  1 by 1 by 1 MPI processor grid
+region particles block 0 8.0 0 8.0 0 20.0
+create_atoms 1 region particles
+Created 5120 atoms
+pair_coeff 1 1 1.0 1.0 2.5
+mass 1 1.0
+change_box all z final 0.0 34 remap units box
+  orthogonal box = (0 0 0) to (12.8152 12.8152 34)
+
+# select particles in one side of the elongated box
+region left plane 0 0 10 0 0 1
+group left region left
+2688 atoms in group left
+
+velocity left create 6.0 1 mom yes rot yes
+
+# simulation parameters
+neighbor	0.6 bin
+timestep	0.004
+run_style 	verlet
+fix ensemble left nve     # Note: only move particle in left-hand side
+fix langevin left langevin 3.0 0.8 100.0 2017
+
+# outout
+thermo_style custom step temp pzz pe lz
+thermo 100
+# dump dumpXYZ all xyz 100 traj.xyz
+
+# run reduced for demonstration purposes
+# run 10000
+run 5000
+Neighbor list info ...
+  update every 1 steps, delay 10 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 3.1
+  ghost atom cutoff = 3.1
+  binsize = 1.55, bins = 9 9 22
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair lj/cut, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 4.524 | 4.524 | 4.524 Mbytes
+Step Temp Pzz PotEng Lz 
+       0    3.1494433   -3.4735106   -6.8707307           34 
+     100    1.7727555    6.5330255   -4.8035477           34 
+     200    1.7462368    7.0070325   -4.7646426           34 
+     300    1.7564888    6.6190123   -4.7894637           34 
+     400    1.7641186     6.609684   -4.8064772           34 
+     500    1.7383511    6.7304936   -4.7708095           34 
+     600     1.731708    6.8574656   -4.7612918           34 
+     700    1.7332167    6.6530919   -4.7670014           34 
+     800    1.7487537    6.5644963   -4.7907458           34 
+     900    1.7353648    6.7115188   -4.7772149           34 
+    1000     1.728878    6.4175719   -4.7797216           34 
+    1100    1.7471488    6.5346083    -4.813376           34 
+    1200    1.7188149    6.2502104   -4.7822235           34 
+    1300    1.7151194     6.792534   -4.7781701           34 
+    1400    1.7406603    6.6639592   -4.8170174           34 
+    1500    1.7090537    6.4677579    -4.770701           34 
+    1600    1.7014954    6.2853535   -4.7679742           34 
+    1700    1.7064354    6.4352857   -4.7812978           34 
+    1800    1.7169971    6.5808758    -4.799426           34 
+    1900    1.6822712    6.3746758   -4.7522464           34 
+    2000    1.7126546     6.534969   -4.8091595           34 
+    2100    1.7086108    6.4679932   -4.8146664           34 
+    2200    1.6974952    6.3802129   -4.8052505           34 
+    2300    1.6868035    6.4009243   -4.7935769           34 
+    2400    1.7107125    6.2318869   -4.8358765           34 
+    2500     1.660241    6.4891487   -4.7661183           34 
+    2600    1.6801816    6.1988356   -4.8024291           34 
+    2700    1.6940298    6.1328187   -4.8290053           34 
+    2800    1.6755061    6.4150693   -4.8145473           34 
+    2900    1.6749928    6.4248792   -4.8213509           34 
+    3000    1.6310737    6.6491291   -4.7673027           34 
+    3100    1.6559915    6.2726719   -4.8109181           34 
+    3200    1.6574579    5.7132029   -4.8189484           34 
+    3300    1.6816136    5.7697439   -4.8652811           34 
+    3400    1.6489483    6.4463349   -4.8247812           34 
+    3500    1.6557974    5.9763333   -4.8383712           34 
+    3600    1.6215459    6.2806534   -4.7954657           34 
+    3700    1.6484987    6.0671609   -4.8470777           34 
+    3800    1.6473922    5.8688108   -4.8555351           34 
+    3900    1.6435957     5.930425   -4.8562076           34 
+    4000    1.6514434    6.1962122    -4.872998           34 
+    4100    1.6138337    6.4808124   -4.8219373           34 
+    4200    1.6215239    5.9467966   -4.8412146           34 
+    4300    1.6129295    5.9377323   -4.8414596           34 
+    4400    1.6020549    6.1104301   -4.8395939           34 
+    4500    1.6047738    6.0816222   -4.8538151           34 
+    4600    1.6053565     6.183466   -4.8686817           34 
+    4700    1.6088152    5.7416542    -4.894114           34 
+    4800    1.5954309     5.694319   -4.8840198           34 
+    4900    1.5582564    6.1199614   -4.8429998           34 
+    5000    1.5786672    5.8813574   -4.8907344           34 
+Loop time of 28.3867 on 1 procs for 5000 steps with 5120 atoms
+
+Performance: 60873.483 tau/day, 176.139 timesteps/s
+99.4% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 22.269     | 22.269     | 22.269     |   0.0 | 78.45
+Neigh   | 4.7222     | 4.7222     | 4.7222     |   0.0 | 16.64
+Comm    | 0.40821    | 0.40821    | 0.40821    |   0.0 |  1.44
+Output  | 0.0042329  | 0.0042329  | 0.0042329  |   0.0 |  0.01
+Modify  | 0.88231    | 0.88231    | 0.88231    |   0.0 |  3.11
+Other   |            | 0.1005     |            |       |  0.35
+
+Nlocal:    5120 ave 5120 max 5120 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:    7768 ave 7768 max 7768 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    297167 ave 297167 max 297167 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 297167
+Ave neighs/atom = 58.0404
+Neighbor list builds = 474
+Dangerous builds = 246
+write_data data.halfhalf
+Total wall time: 0:00:28
diff --git a/examples/USER/misc/rhok/log.22Sep2017.setup.g++.4 b/examples/USER/misc/rhok/log.22Sep2017.setup.g++.4
new file mode 100644
index 0000000000000000000000000000000000000000..14088f7c95cfd220b3895a8d3861a70ec093c849
--- /dev/null
+++ b/examples/USER/misc/rhok/log.22Sep2017.setup.g++.4
@@ -0,0 +1,141 @@
+LAMMPS (22 Sep 2017)
+  using 1 OpenMP thread(s) per MPI task
+units lj
+dimension 3
+boundary p p p
+atom_style atomic
+
+# truncated and shifted LJ potential
+pair_style lj/cut 2.5
+pair_modify shift yes
+
+# fcc lattice
+lattice	fcc 0.9731
+Lattice spacing in x,y,z = 1.6019 1.6019 1.6019
+region my_box block 0 8.0   0 8.0   0 20.0
+create_box 1 my_box
+Created orthogonal box = (0 0 0) to (12.8152 12.8152 32.0379)
+  1 by 1 by 4 MPI processor grid
+region particles block 0 8.0 0 8.0 0 20.0
+create_atoms 1 region particles
+Created 5120 atoms
+pair_coeff 1 1 1.0 1.0 2.5
+mass 1 1.0
+change_box all z final 0.0 34 remap units box
+  orthogonal box = (0 0 0) to (12.8152 12.8152 34)
+
+# select particles in one side of the elongated box
+region left plane 0 0 10 0 0 1
+group left region left
+2688 atoms in group left
+
+velocity left create 6.0 1 mom yes rot yes
+
+# simulation parameters
+neighbor	0.6 bin
+timestep	0.004
+run_style 	verlet
+fix ensemble left nve     # Note: only move particle in left-hand side
+fix langevin left langevin 3.0 0.8 100.0 2017
+
+# outout
+thermo_style custom step temp pzz pe lz
+thermo 100
+# dump dumpXYZ all xyz 100 traj.xyz
+
+# run reduced for demonstration purposes
+# run 10000
+run 5000
+Neighbor list info ...
+  update every 1 steps, delay 10 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 3.1
+  ghost atom cutoff = 3.1
+  binsize = 1.55, bins = 9 9 22
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair lj/cut, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 3.23 | 3.23 | 3.23 Mbytes
+Step Temp Pzz PotEng Lz 
+       0    3.1494433   -3.4735106   -6.8707307           34 
+     100    1.7914373    6.4805818   -4.8420353           34 
+     200     1.740256    6.6108149   -4.7672571           34 
+     300    1.7663827    6.5188941   -4.8103672           34 
+     400    1.7440644    6.5156543   -4.7769467           34 
+     500    1.7471724    6.5208992   -4.7843928           34 
+     600    1.7320106    6.6557835   -4.7654637           34 
+     700    1.6839043    6.7689759   -4.7045352           34 
+     800    1.7216746      6.66436   -4.7601673           34 
+     900    1.7342542    6.3242367   -4.7790803           34 
+    1000    1.7338566    6.5803438   -4.7854529           34 
+    1100    1.7328856    6.3846366   -4.7902625           34 
+    1200    1.7546906    6.5048137   -4.8213443           34 
+    1300    1.7163891    6.3903221   -4.7665145           34 
+    1400    1.7011627    6.5782672   -4.7517875           34 
+    1500    1.7105234    6.5811813   -4.7677748           34 
+    1600    1.7334403    6.5032837   -4.8067749           34 
+    1700    1.7252102    6.5443871   -4.8058994           34 
+    1800     1.721958    6.3378188   -4.8150073           34 
+    1900    1.6797892    6.6780506   -4.7538618           34 
+    2000    1.7001774    6.3578192   -4.7894018           34 
+    2100    1.7127784    6.3219105   -4.8161059           34 
+    2200     1.696825     6.536793   -4.7946902           34 
+    2300    1.6704578    6.7186933   -4.7609628           34 
+    2400    1.6772498    6.3432817   -4.7778471           34 
+    2500    1.7073862    6.2153226   -4.8299181           34 
+    2600    1.6951557    6.4397257   -4.8156787           34 
+    2700    1.6845984    6.0123544   -4.8136864           34 
+    2800    1.6550565    6.2489392   -4.7829639           34 
+    2900    1.6892315     6.158499   -4.8423004           34 
+    3000    1.6814436      6.07976   -4.8400696           34 
+    3100    1.6387025     6.330166   -4.7878978           34 
+    3200    1.6747855    6.0767043   -4.8481995           34 
+    3300    1.6508768    6.2749233   -4.8181888           34 
+    3400    1.6426364    6.3934935   -4.8223824           34 
+    3500    1.6576512    6.0638185   -4.8559078           34 
+    3600    1.6444173    6.1376573   -4.8463113           34 
+    3700    1.6480039    5.9943705   -4.8601776           34 
+    3800    1.6467212    6.0556591   -4.8722719           34 
+    3900    1.6271804     6.116738   -4.8547278           34 
+    4000    1.6158134    5.9089534   -4.8477829           34 
+    4100    1.6388157    5.9890465   -4.8920284           34 
+    4200    1.6182368    6.0639887   -4.8724963           34 
+    4300     1.647633    5.6333906   -4.9267536           34 
+    4400    1.5856411    6.2675475   -4.8471239           34 
+    4500    1.5773417    6.1789163   -4.8469057           34 
+    4600    1.6181445    5.7988068    -4.922419           34 
+    4700    1.5876712    5.7398111   -4.8853849           34 
+    4800    1.5708353    6.2204997   -4.8718872           34 
+    4900    1.5514708    5.9782256   -4.8523812           34 
+    5000     1.553347    5.9286523     -4.86582           34 
+Loop time of 8.10259 on 4 procs for 5000 steps with 5120 atoms
+
+Performance: 213265.164 tau/day, 617.087 timesteps/s
+99.2% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 5.2964     | 5.6236     | 5.8982     |   9.0 | 69.40
+Neigh   | 1.0562     | 1.1907     | 1.3257     |   8.7 | 14.70
+Comm    | 0.43963    | 0.98786    | 1.5968     |  42.5 | 12.19
+Output  | 0.0023124  | 0.004741   | 0.0090873  |   4.0 |  0.06
+Modify  | 0.018652   | 0.22213    | 0.39884    |  36.4 |  2.74
+Other   |            | 0.07357    |            |       |  0.91
+
+Nlocal:    1280 ave 1337 max 1211 min
+Histogram: 1 0 0 0 0 1 1 0 0 1
+Nghost:    3416.25 ave 3549 max 3297 min
+Histogram: 2 0 0 0 0 0 0 0 1 1
+Neighs:    74269.8 ave 77932 max 69612 min
+Histogram: 1 0 0 0 0 1 0 1 0 1
+
+Total # of neighbors = 297079
+Ave neighs/atom = 58.0232
+Neighbor list builds = 474
+Dangerous builds = 247
+write_data data.halfhalf
+Total wall time: 0:00:08
diff --git a/examples/USER/uef/README b/examples/USER/uef/README
new file mode 100644
index 0000000000000000000000000000000000000000..aaf53fa2a115a38ec297f70e0db67b6503f2a180
--- /dev/null
+++ b/examples/USER/uef/README
@@ -0,0 +1,19 @@
+This directory contains two short example scripts for the USER-UEF
+package.
+
+
+nvt_uniaxial:
+  A simulation of a WCA fluid under constant volume uniaxial strain
+  with stretching in the z direction, compression in the x and y
+  directions.The xx, yy and zz components of the pressure tensor are 
+  written to the log file. The simulation conditions are the same 
+  used in the paper by Thomas Hunt [http://arxiv.org/abs/1310.3905]
+  and should yield an zz pressure near 5 and xx/yy pressures near 10.
+
+npt_biaxial:
+  A simulation of a WCA fluid under stress-controlled biaxial strain 
+  with stretching in the x and y directions, compression in the z 
+  direction. The zz stress is controlled using the ext keyword in fix
+  npt/uef. The xx yy and zz components of the pressure tensor are 
+  written to the log file. The simulations conditions should yield 
+  xx/yy pressures near 5 and zz pressure near 10
diff --git a/examples/USER/uef/npt_biaxial/data.wca b/examples/USER/uef/npt_biaxial/data.wca
new file mode 100644
index 0000000000000000000000000000000000000000..889ba4d2f2a8d2cb3892bda144544f23140ccc03
--- /dev/null
+++ b/examples/USER/uef/npt_biaxial/data.wca
@@ -0,0 +1,1022 @@
+LAMMPS data file via write_data, version 14 May 2016, timestep = 400000
+
+500 atoms
+1 atom types
+
+0.0000000000000000e+00 8.3979809569125372e+00 xlo xhi
+0.0000000000000000e+00 8.3979809569125372e+00 ylo yhi
+0.0000000000000000e+00 8.3979809569125372e+00 zlo zhi
+
+Masses
+
+1 1
+
+Pair Coeffs # lj/cut
+
+1 1 1
+
+Atoms # atomic
+
+7 1 2.4137737201272674e-01 2.0017433398687118e-01 3.6157393311703534e-02 2 1 2
+398 1 1.6739594898193126e+00 3.0313452824803544e-01 5.7986723161362197e-01 -3 0 1
+173 1 6.1370793327804556e+00 4.4978954867119525e-01 3.6568951377817088e-01 -4 1 0
+462 1 7.8754716784931862e+00 5.2908038473333074e-01 7.9185633743762940e-01 1 1 -2
+383 1 6.5373096080170212e-01 1.5337525983981986e+00 6.3208419126059423e-01 2 0 1
+288 1 1.9354124990672374e+00 1.3318105136786291e+00 3.6211635210084403e-01 -1 1 0
+303 1 2.9661308460819318e+00 1.1444058564802859e+00 6.5604777151148241e-01 -2 2 1
+491 1 7.1514675802966758e+00 1.3588685826107376e+00 1.2875068928885325e-01 -1 0 0
+187 1 3.0670386025081497e-01 2.4850915964494620e+00 7.0944157374329464e-02 0 -1 0
+163 1 1.2805212773629451e+00 2.4034621328433090e+00 2.0184086197146742e-01 0 1 -1
+345 1 2.5952244948945173e+00 2.3405536448220743e+00 3.0019149048630317e-01 -3 2 1
+447 1 3.9059163101716741e+00 1.6849179478858272e+00 6.0306488750926446e-01 -2 3 -4
+108 1 4.9215629854759335e+00 2.5174843149522088e+00 6.2779912940916158e-02 1 0 -1
+427 1 6.3978861821382305e+00 1.9201774263431104e+00 4.3981049730412797e-01 -1 -2 -3
+96 1 7.4061541304605534e+00 2.4828308207907996e+00 2.4705543772216432e-01 0 -3 2
+494 1 8.0759482343563711e+00 1.6914241739458744e+00 6.8482420189075921e-01 -3 1 2
+360 1 3.1886448226656765e+00 3.1126224743956139e+00 5.4052217066940123e-01 1 1 -2
+179 1 4.2399500260508116e+00 2.8640916432630559e+00 7.5278462735962870e-01 -1 1 1
+136 1 6.2603021059339365e+00 3.2638426060411403e+00 9.9163685662983045e-02 -2 0 3
+311 1 8.0699058291845773e+00 3.1228722160017734e+00 7.5458759573094458e-01 -3 -3 3
+242 1 8.1363443818268044e-02 3.8061968817045986e+00 5.4644298839992535e-02 1 1 1
+451 1 1.9291602479953753e+00 3.5896469823727863e+00 2.3008537852985023e-01 -1 0 0
+118 1 5.3566912079946780e+00 3.5234509548370374e+00 6.7801317822012042e-01 1 -3 -1
+200 1 6.7588369512940338e-01 6.1719413314115790e-01 9.9275606042642373e-01 -1 2 3
+23 1 2.1873327028525060e+00 4.2754803645009956e-02 1.4143560341384132e+00 -3 -1 0
+30 1 3.9592898204338112e+00 6.8093268022015185e-01 1.2987004742604558e+00 -1 -2 -2
+26 1 5.1175076813830467e+00 5.1869033157649591e-01 9.5797839381311178e-01 0 1 2
+452 1 6.5582763761917411e+00 3.4514602671261341e-01 1.2368598815410630e+00 -2 0 -2
+439 1 8.3865135868155445e+00 1.4825845267600596e-01 1.6737363681796917e+00 -2 3 1
+492 1 1.6645816560197648e+00 9.7557398318995125e-01 1.3973497820249918e+00 -2 3 1
+287 1 2.8845951500519784e+00 9.2974386224238703e-01 1.6104516406361071e+00 2 -1 2
+87 1 5.9282888423658395e+00 1.2716232081523247e+00 1.1744299163086194e+00 -1 2 -1
+191 1 7.0465887128985001e+00 1.3447129135151838e+00 1.1378718594147066e+00 -1 2 0
+316 1 6.6785856436835267e-01 2.4243200282222617e+00 1.1253836626025919e+00 0 1 0
+176 1 1.8165460632802826e+00 2.1437252128100086e+00 1.0868234266569741e+00 1 1 1
+244 1 3.5041229236927576e+00 2.3924032537771125e+00 1.3551249499971583e+00 -3 1 4
+463 1 4.9238071200835165e+00 1.7488964460038467e+00 8.7393010597649756e-01 -2 2 1
+101 1 4.7502017837518187e+00 2.5070270251466997e+00 1.6042073679153410e+00 -3 0 -2
+456 1 6.7262574842655534e+00 2.1937608587635533e+00 1.5819520420856528e+00 0 2 3
+90 1 7.8106680805872131e+00 2.3465270292028992e+00 1.4066520475037838e+00 0 -4 1
+457 1 2.2543678933961773e+00 3.1652213402916889e+00 1.1451988834668292e+00 -1 0 0
+344 1 5.9751963550424136e+00 2.8740523385222234e+00 1.2530570351392452e+00 0 1 -1
+399 1 8.2525481582297566e-01 3.4796551803028120e+00 1.0452539946382762e+00 1 -1 -1
+472 1 2.5500022667255768e+00 4.1144011701979659e+00 1.1937493716055980e+00 0 1 2
+364 1 3.2074279852156709e+00 3.3909867375667604e+00 1.5492265188845586e+00 -1 0 0
+333 1 4.2063776055408209e+00 3.7436012461483292e+00 1.2983951160694676e+00 -1 1 1
+223 1 6.0720407506223024e+00 3.9437056464141231e+00 1.5634726007362729e+00 0 0 1
+277 1 6.8850289844945918e+00 3.5744109988378070e+00 1.1843729982426427e+00 2 0 0
+487 1 8.0236797280148657e+00 4.0759691449476652e+00 1.1816920447826709e+00 -1 0 5
+202 1 1.1425010515906946e+00 1.1629395296284512e-02 2.5086432758529211e+00 -1 -2 0
+139 1 2.3147555111337756e+00 1.8665688332261610e-01 2.4220734387693037e+00 -5 0 1
+218 1 3.5367098631556342e+00 6.2760250051756761e-01 2.4478818232869410e+00 1 0 -2
+10 1 4.3729774450168737e+00 1.6632191047636544e-01 2.1079853667170236e+00 -2 -1 -2
+64 1 5.7303792911825742e+00 2.9006680684615282e-01 1.9605629224377070e+00 -2 1 2
+40 1 6.5824557178778531e-01 8.5028950299011019e-01 2.1496168105059708e+00 2 1 1
+328 1 1.5589817713112594e-01 1.6225523918451032e+00 1.7310231111691350e+00 3 0 0
+292 1 2.1314993621742819e+00 1.2260233186264009e+00 2.3652294967955245e+00 0 0 -1
+245 1 4.6666032401598603e+00 1.5042368206958292e+00 1.9142905720694421e+00 -3 1 -1
+148 1 6.7720428171922631e+00 8.7008110148972428e-01 2.0934267451930806e+00 0 0 1
+435 1 7.7341388550057273e+00 9.1983544373491155e-01 1.8223856537522831e+00 -2 -1 1
+301 1 2.9295611364078922e-01 2.4942786265027763e+00 2.2355337509297120e+00 -2 -3 -2
+476 1 1.3648021789963285e+00 1.8605609454926342e+00 2.0851101905225424e+00 1 0 -1
+310 1 2.6428836469132526e+00 2.1636738846129742e+00 1.7827293416008847e+00 3 2 0
+137 1 3.6368685754799719e+00 1.7198335009724290e+00 2.1014306554853008e+00 1 -1 1
+434 1 5.6400362255303138e+00 2.0264029757942144e+00 1.7648993119896872e+00 0 -1 0
+184 1 7.6228520959849160e+00 2.1704001606029473e+00 2.4690779972373509e+00 0 1 -1
+5 1 1.5547914352159820e+00 3.0246432147284117e+00 1.9341058666944422e+00 5 1 -2
+41 1 3.5394530649909939e+00 2.8672493763709368e+00 2.3721754433370239e+00 0 0 0
+410 1 5.2622202291807252e+00 3.3532222678989076e+00 2.0249715425869179e+00 2 1 1
+418 1 6.2986243250817040e+00 2.9042203242504363e+00 2.3450580832445986e+00 -1 2 0
+34 1 6.5845359986575269e-01 3.5330771386953552e+00 2.1708302646106170e+00 -2 3 1
+67 1 2.2321109286615104e+00 3.8391986294826608e+00 2.3598318070524229e+00 1 -1 0
+390 1 4.4001972293445766e+00 3.7784400898230768e+00 2.3530190506208175e+00 1 2 -1
+125 1 6.8584035481270371e+00 3.7859398002695177e+00 2.2262509649550242e+00 -3 1 3
+105 1 8.0142836210079444e+00 3.4253500628703644e+00 1.9822510084209746e+00 1 0 -2
+274 1 4.5713678557713822e-01 5.4244335571632307e-01 3.0849174937154351e+00 1 -2 1
+314 1 1.8447059547300777e+00 1.1692544876557610e-01 3.3199499683474132e+00 2 0 -1
+408 1 3.1814603728989130e+00 7.4387930556111925e-02 3.3119410401147560e+00 -1 1 1
+142 1 5.0629800765951494e+00 4.5529434814892644e-01 2.8994044435235393e+00 2 -1 -3
+25 1 6.1307890615815195e+00 4.8707110399724851e-01 2.9156227033318936e+00 0 2 -4
+458 1 7.5149810474983081e+00 4.9072663758191898e-01 2.7754564040841219e+00 2 -4 0
+165 1 1.3117326379233891e+00 1.1343392736952256e+00 3.1076574691841947e+00 -2 2 2
+421 1 2.7633861789985827e+00 9.5478806243591052e-01 3.1957876340691875e+00 -1 -3 1
+151 1 4.7220243778881930e+00 1.4282004346636548e+00 3.2642378377455432e+00 0 0 -1
+162 1 5.6468847674841811e+00 1.3364063405496600e+00 2.5903751818453160e+00 4 -4 -2
+120 1 8.2129099591176686e+00 1.3575499019485984e+00 2.6634845511723606e+00 0 0 0
+448 1 8.8805142887530297e-01 2.2694079305496020e+00 3.1897834611367313e+00 -1 1 3
+50 1 2.9687835700409062e+00 1.9135157390691884e+00 2.8548074734572992e+00 -3 0 1
+443 1 4.0462687470728396e+00 2.3330394967344139e+00 3.1667340148022216e+00 0 1 -2
+482 1 4.9427462157614270e+00 2.1611442624383619e+00 2.6370759110090005e+00 0 3 3
+407 1 5.7753726348779013e+00 2.4435329312939671e+00 3.1756094964019836e+00 0 1 -1
+100 1 6.6610073695947598e+00 1.9476592422501362e+00 2.6046914024931409e+00 -2 -2 1
+99 1 3.5098260053604374e-01 3.2489377905726768e+00 3.1253385231740438e+00 -1 1 -1
+107 1 1.3942178307183000e+00 3.1914627339242005e+00 2.9813408440396350e+00 2 0 1
+480 1 2.3066257454835091e+00 2.6794898513136354e+00 2.7495660819550110e+00 -1 -2 -1
+115 1 3.1214377398204638e+00 2.9223083977660864e+00 3.3233156913824242e+00 1 2 1
+278 1 5.0520729931158854e+00 3.1424127338010743e+00 3.1175324674801042e+00 0 0 -1
+206 1 7.7081484276756580e+00 3.0580419350207482e+00 3.0362913717240096e+00 0 -1 -4
+4 1 1.3000434256419220e+00 4.1733384323360649e+00 2.6659576195319934e+00 1 0 2
+214 1 3.2669228431804429e+00 3.8002115979716740e+00 2.6408106015701289e+00 0 0 0
+65 1 4.0280349657536760e+00 3.3861374196314542e+00 3.2331032594455227e+00 2 1 2
+490 1 5.8571557116011554e+00 3.8735942685941813e+00 2.8613897619661586e+00 -1 -2 0
+230 1 2.4828380364158602e+00 5.5335473677805791e-01 4.1950137944148906e+00 2 -1 3
+234 1 4.2752320357614213e+00 1.4212800762204394e-01 3.7635975156407624e+00 -2 1 -4
+92 1 5.6957646578006917e+00 2.5374658514218495e-02 3.8304068113906884e+00 0 -1 0
+103 1 7.0230616578295741e+00 4.6755655210711161e-01 3.8662999072020967e+00 1 -2 1
+129 1 8.1671299320274162e+00 5.7067862198193264e-01 4.0702212708530077e+00 -1 -2 0
+483 1 3.2862260723228903e-01 1.4557716141563948e+00 3.6577409481728278e+00 -3 4 -1
+110 1 1.5047240722693447e+00 1.0396158231062973e+00 4.0843832165787424e+00 -2 -1 0
+126 1 3.7388270640303456e+00 1.1962750537841655e+00 3.4335639532998909e+00 3 0 0
+276 1 5.6087889039130960e+00 1.1358153696317190e+00 3.7428282467743927e+00 0 2 1
+294 1 6.5127919732673050e+00 1.4345197299837358e+00 3.4448769173070444e+00 -2 0 2
+417 1 7.5734653423068021e+00 1.3756634588591667e+00 3.5078056411981438e+00 -1 1 0
+102 1 1.1107713360470239e+00 2.0167473232388695e+00 4.1576658165713738e+00 -2 0 0
+228 1 2.0072330566273600e+00 1.8105875233272237e+00 3.4619908126078429e+00 2 0 -1
+2 1 3.2101113937010530e+00 2.0183489554560774e+00 3.8822820800419566e+00 0 1 -1
+222 1 4.2730523271288350e+00 1.9092752363315002e+00 4.1553216684001049e+00 1 -1 0
+332 1 6.5042309397343736e+00 2.2214607151080448e+00 4.1057034120156182e+00 0 1 3
+275 1 7.5099659540449100e+00 2.2790896507455778e+00 4.1566328430638730e+00 -1 0 -1
+243 1 8.2736142057938107e+00 2.3393256795577462e+00 3.4990413844829384e+00 0 0 -2
+394 1 5.8512440497757878e-01 2.8807282965119669e+00 3.9612614891397042e+00 1 3 1
+325 1 2.1875714466797338e+00 2.8726101452768167e+00 3.8147636089102748e+00 0 2 -1
+468 1 5.6059790271973755e+00 2.9975764588671621e+00 3.9816014902843078e+00 0 1 0
+113 1 6.5849924400139104e+00 3.1808065990344776e+00 3.4367361163256929e+00 0 0 1
+47 1 1.0081753047752569e+00 3.9556172416934308e+00 3.8502376136806813e+00 1 -1 0
+474 1 1.9694475511677239e+00 3.9381337754040748e+00 3.4201230948176762e+00 4 1 -1
+343 1 3.0370114219017328e+00 3.9465351732349148e+00 3.7436387890882474e+00 0 -2 3
+257 1 6.0639634839200358e+00 3.9545122043987475e+00 3.7922755780058259e+00 1 3 -3
+422 1 7.3939236737263112e+00 3.9358351587357783e+00 3.4932390476416106e+00 -2 -1 -3
+317 1 3.5159353631335000e+00 5.3479576975233323e-01 4.4869163379906354e+00 -1 -2 -4
+322 1 5.5733359556605757e+00 4.8698103039818835e-01 4.7704783768556309e+00 1 6 -1
+453 1 7.3567123647601029e+00 8.3432787155128474e-02 4.8780449077720851e+00 -2 3 2
+82 1 5.8224871123849942e-01 1.1896491837492689e+00 4.7267276175254507e+00 3 0 -1
+402 1 2.4848673528490579e+00 1.6438582837634208e+00 4.4663750233867230e+00 0 2 3
+201 1 3.5370299676438193e+00 1.4874347892452509e+00 4.7752714735806165e+00 0 -2 -1
+159 1 4.7745775925936407e+00 9.0511891230699659e-01 4.2787903105333260e+00 0 -2 2
+239 1 6.3670232447984816e+00 1.2120460051587267e+00 4.4601763672019308e+00 0 1 -1
+340 1 7.6240687426290750e+00 1.3432987598476136e+00 4.6741720105351368e+00 -2 1 1
+246 1 6.4819140516389595e-02 2.1918155429312383e+00 4.6033031341679633e+00 0 2 1
+268 1 5.2454607296226170e+00 2.0016193334579642e+00 4.2366338410742728e+00 -2 2 0
+238 1 1.7670560071066166e+00 3.0774892421151585e+00 4.7759677543328642e+00 0 0 0
+183 1 2.7500175098986750e+00 2.8066272725663279e+00 4.6976729622871014e+00 -2 0 0
+393 1 3.6570205199594930e+00 3.1994600133365143e+00 4.2220986255821451e+00 1 2 -1
+177 1 4.6077861627416805e+00 2.8922144055644265e+00 4.5492372075807843e+00 0 -1 0
+97 1 2.5306051464072796e+00 3.9363146816844674e+00 4.9489842130852137e+00 2 0 1
+16 1 5.1924745493168265e+00 3.7357318418579575e+00 4.8717896643693246e+00 0 -2 -1
+15 1 6.1086100243278070e+00 4.1952083415168335e+00 4.9407045230806066e+00 -1 0 -4
+467 1 6.6872865628091098e+00 3.3657369472951393e+00 4.6943068780100674e+00 1 2 2
+404 1 8.0052336596808171e+00 3.5358005023997046e+00 4.5274309747477952e+00 2 2 -1
+190 1 9.4380374464947103e-01 4.4583992222855645e-01 5.4653306453144710e+00 1 0 1
+429 1 1.8525920421435695e+00 3.5762708549602490e-01 5.0946475568663319e+00 3 0 2
+356 1 3.9097878727975877e+00 6.1716052733039095e-01 5.4734744838905147e+00 -3 1 -1
+459 1 5.0297951477539122e+00 6.4682655744305417e-01 5.6718176557063691e+00 0 1 1
+140 1 6.3845220800541655e+00 6.6200714540119532e-01 5.4418985717149972e+00 2 -1 -2
+121 1 6.3608122327251349e-01 1.6422337064097718e+00 5.7379184381145940e+00 -2 -1 -1
+76 1 2.6863089369401427e+00 9.2826845731889596e-01 5.1620994403726908e+00 -1 -1 -1
+24 1 4.4731219011860990e+00 1.5605471103773427e+00 5.2523372789875760e+00 0 -1 -1
+215 1 5.5638567276602870e+00 1.5540980050800413e+00 5.2788517723209116e+00 2 1 0
+406 1 7.1965017439495353e+00 1.2927814031197866e+00 5.6667263284723859e+00 0 2 -2
+495 1 8.1400977173034548e+00 9.1998286287268760e-01 5.5427200454151757e+00 6 1 0
+256 1 1.5858394354874155e+00 1.8927406862702940e+00 5.2157829401895270e+00 4 0 1
+348 1 2.8144079358075680e+00 2.0457885486401644e+00 5.4765038200952452e+00 1 -1 2
+18 1 3.7959072614405218e+00 2.4151932066273840e+00 5.1090004037855792e+00 0 -2 -3
+259 1 4.8058181463022738e+00 2.5068639859502841e+00 5.5173228168937900e+00 0 0 1
+261 1 5.9067727879570278e+00 2.4818772803689844e+00 5.0470051254431221e+00 0 0 -3
+433 1 6.6238775496086628e+00 2.4655070126810084e+00 5.8390299036823690e+00 -4 1 0
+119 1 7.4961811313748150e+00 2.2560858298720374e+00 5.2761246161627531e+00 0 -2 0
+181 1 8.7803165796510541e-01 2.5368621724963174e+00 5.1009676967579534e+00 3 -4 -2
+392 1 1.3563447389359846e+00 3.2004762283973394e+00 5.8237816659365569e+00 0 1 -1
+436 1 2.3946011665804514e+00 3.0278168308484168e+00 5.6997814720410966e+00 1 1 2
+210 1 5.9632298308600022e+00 3.3253508577291475e+00 5.7908436280268685e+00 0 -1 -2
+409 1 8.2954742717592467e+00 2.8784369153928258e+00 5.3508660402584347e+00 0 0 0
+437 1 7.8285811502936531e-01 3.5972247646914322e+00 5.1173576054548029e+00 -1 2 1
+114 1 3.5350435499222215e+00 3.3621891050940294e+00 5.4926280066777995e+00 0 -1 1
+431 1 7.2810459589983303e+00 4.1806346853520422e+00 5.1699487852752508e+00 0 -1 2
+296 1 2.7493301442805773e-01 9.8739001637521445e-02 6.5256735912844297e+00 2 0 -1
+477 1 2.7650505314326872e+00 5.9323041848923519e-01 6.1880566096904213e+00 -1 0 -2
+199 1 3.8449072993955937e+00 6.8021783868606100e-01 6.5252139926754094e+00 1 0 -1
+31 1 6.8196091485194961e+00 6.5962784856610523e-01 6.6694400071986637e+00 0 -2 -1
+149 1 7.6325509159351759e+00 1.3582393886225264e-01 5.8995743775761014e+00 0 1 -2
+216 1 9.7752025904362638e-01 1.4078822836813889e+00 6.6728311339234487e+00 2 0 1
+425 1 1.7005586383875275e+00 1.1280428057946255e+00 5.8993957526091192e+00 -2 5 2
+224 1 2.8675797989920913e+00 1.6348520503423174e+00 6.4743030869420952e+00 1 0 0
+352 1 3.7461129077349202e+00 1.6467836683144792e+00 5.9711819974315681e+00 0 2 1
+289 1 5.0071286046398447e+00 1.3773263732616476e+00 6.4710057868740503e+00 2 -1 2
+455 1 6.2266605172975309e+00 1.4566180649712972e+00 6.2025029141131824e+00 0 1 2
+298 1 7.2166245583854813e+00 1.5995071342823830e+00 6.6916596114412981e+00 -2 2 -2
+42 1 8.3616236989495309e+00 1.1193553859054726e+00 6.6834990220304640e+00 1 -1 -1
+80 1 1.7994758316879240e+00 2.0602535563487749e+00 6.2756102858385763e+00 1 -1 -2
+127 1 4.3098326176630284e+00 2.3959981453352577e+00 6.4129162374884245e+00 -3 2 -4
+232 1 5.4258898148163945e+00 2.3715461809208245e+00 6.2879862147301759e+00 1 1 -4
+368 1 8.2199446060636472e+00 2.1052670796140989e+00 6.3974575157449376e+00 -1 -1 -2
+486 1 6.9787179588597281e-01 2.5321347977480562e+00 6.5634477651369929e+00 1 -2 1
+209 1 1.7697882729109665e-01 3.3153947644324067e+00 6.2408024968147000e+00 -1 0 -2
+152 1 3.1591516689848946e+00 2.6297173021869003e+00 6.2970081587154914e+00 -1 3 2
+353 1 3.7775468094107074e+00 3.3545010039524996e+00 6.5652719772105916e+00 0 -2 1
+361 1 7.1568303860427589e+00 3.3402954049017803e+00 6.0487812936573579e+00 1 1 0
+391 1 1.8477885811255761e+00 3.9829271272845177e+00 6.5960694610186286e+00 2 0 -1
+464 1 2.8125899145281190e+00 3.9218837233739201e+00 6.1411786897515759e+00 -2 2 1
+497 1 4.5524588331963729e+00 3.4967999912980696e+00 5.9321819433594642e+00 1 -2 2
+498 1 2.0830532473234906e+00 1.1220036849985102e-01 7.1674325217309276e+00 3 3 -1
+264 1 3.1008573853993013e+00 7.6076625369238260e-01 7.2316131354301971e+00 1 1 2
+265 1 4.6342157774659736e+00 7.4471207241976944e-01 7.1556952519725225e+00 0 -1 1
+350 1 5.7838647346068255e+00 5.9645078928256690e-01 6.9763870948006925e+00 2 -2 0
+144 1 1.9122165150526358e+00 1.1329756208528616e+00 6.9185463020768152e+00 0 1 1
+17 1 3.7256587791934250e+00 1.6333740129131904e+00 7.1341698423474043e+00 0 0 0
+441 1 3.6115685474483350e-01 1.9101453221098965e+00 7.5303824824165133e+00 0 1 2
+266 1 1.4762191689510862e+00 2.4662450761248356e+00 7.5398542897976331e+00 -3 -2 -2
+375 1 2.3309589830172550e+00 2.0436669163885055e+00 7.1892097709618481e+00 0 1 0
+351 1 4.7143585284219309e+00 1.8434829864646978e+00 7.2780594229535307e+00 1 0 1
+254 1 5.9170265466448875e+00 1.9140788620193201e+00 7.1554204752492074e+00 0 1 0
+29 1 1.5191420610560982e-01 2.9876711650320327e+00 7.5404622831951533e+00 1 -2 0
+198 1 1.9302038195524280e+00 2.9894398797282253e+00 6.7310853131736357e+00 2 -1 -2
+197 1 3.3083005500744571e+00 2.6144599213348498e+00 7.4059907643060248e+00 1 0 2
+192 1 4.3810729888763209e+00 2.8557380429470860e+00 7.4007735378272006e+00 1 0 1
+220 1 5.1223945377780948e+00 3.3376038243952149e+00 6.7920010375935584e+00 0 0 1
+432 1 6.2458841824934863e+00 2.9482647398415351e+00 6.8353911959966540e+00 0 2 -1
+20 1 7.5153792083913968e+00 2.5459516140888447e+00 6.9548679851000408e+00 3 -1 -2
+63 1 1.0659006163620912e+00 3.4796280956085210e+00 7.1592343773906313e+00 0 1 -2
+446 1 7.1139268453604680e+00 4.0629892995134789e+00 7.0800362551664415e+00 1 -3 -2
+172 1 8.3244437231535144e+00 4.0795393733923691e+00 7.1857426258997332e+00 2 0 -1
+405 1 2.0158180111850474e-01 7.3000938410316307e-01 7.6191994505829879e+00 0 -3 0
+323 1 1.4017230037752237e+00 4.6050949606146502e-01 7.9006469000953414e+00 2 1 -1
+428 1 2.4287753885012338e+00 4.6438480937363963e-01 8.3560508824421493e+00 3 0 2
+280 1 3.6502596381287908e+00 7.8205464112991063e-01 8.2256571665042131e+00 3 1 -1
+54 1 5.2270258856938900e+00 5.3851997974995880e-01 8.1047016704593808e+00 -1 0 -5
+397 1 7.4044521194737944e+00 1.4880482327788007e-02 8.2278445496777337e+00 1 0 2
+279 1 1.2356833818326960e+00 1.5625251472088779e+00 7.9109615213928013e+00 0 4 -2
+134 1 2.5628613498785486e+00 1.5781156241932268e+00 7.9994516338459301e+00 0 1 -1
+236 1 4.5573192582583912e+00 1.4401963680745278e+00 8.1580516205465621e+00 0 3 -3
+493 1 6.1321864263285528e+00 9.9745261592364498e-01 7.9044637123188437e+00 0 3 -3
+346 1 7.4631508711766701e+00 8.7980227698742530e-01 7.6161632460447608e+00 2 -1 -1
+295 1 3.6674081936683844e+00 2.3602153939048316e+00 8.3119852957626765e+00 0 1 -2
+164 1 5.5460188609614756e+00 1.8259166833804881e+00 8.1712445391317772e+00 -2 -2 -1
+354 1 6.6872220974400829e+00 2.0300640265600358e+00 7.8561422683464270e+00 1 2 0
+449 1 7.9034553200360103e+00 1.8534144914629234e+00 8.0299472682703943e+00 -1 -3 0
+185 1 9.5448193278219684e-01 3.3578879558028460e+00 8.1917731250955708e+00 1 1 -1
+229 1 2.5099351180061946e+00 3.0097254013180406e+00 7.8296711975898541e+00 -2 2 -1
+135 1 5.5895093348111047e+00 2.7786209311728598e+00 7.5930433616327404e+00 -1 0 -2
+211 1 7.0529928021063730e+00 3.1919477500830000e+00 7.6370125321552074e+00 -2 2 1
+81 1 1.8872269150105381e+00 3.9351057043865243e+00 7.6163648141042426e+00 0 -1 1
+37 1 3.6614705509244851e+00 3.4915765263742262e+00 7.7267865376074960e+00 2 0 1
+355 1 4.5831332879360058e+00 3.8667909487066860e+00 8.3349240075214972e+00 1 -2 0
+141 1 5.4975686789962985e+00 3.7534665361901731e+00 7.8536976598839008e+00 1 3 1
+122 1 7.2999342247943373e+00 3.9738648524411286e+00 8.3865006440763370e+00 0 -1 -2
+327 1 3.2503689184711210e-01 4.6592524224991747e+00 8.2659471361477532e-01 0 -2 2
+386 1 1.1858359917362140e+00 4.2742921253409367e+00 2.8183822072239956e-01 -1 -2 5
+59 1 1.8702404230468521e+00 4.9717730076471947e+00 7.6874981917086671e-01 0 2 -3
+57 1 2.8167286953460633e+00 4.2569068178507328e+00 1.1953882254793591e-01 1 -1 1
+489 1 3.6302378764516323e+00 4.4521177967574310e+00 6.6729235313037527e-01 1 1 5
+326 1 4.9856357064009993e+00 4.5330987087989216e+00 5.2454407858907726e-01 -1 2 2
+359 1 5.9913033298564331e+00 4.3362327743167306e+00 3.7835915829420080e-01 2 -1 -1
+349 1 7.2426723526689933e+00 4.7181533524380885e+00 7.2707643667494415e-01 0 1 -2
+247 1 8.6433343008963215e-01 5.6823007167579762e+00 6.5224250416194052e-01 1 -3 2
+365 1 3.8233209759419231e+00 5.6939269324333672e+00 5.9607489711922135e-01 -1 -1 0
+382 1 5.1137068164970731e+00 5.5805604939667059e+00 1.8276059551769425e-01 -1 -2 1
+130 1 8.2482820181805554e+00 5.5968590423898927e+00 4.4653409437071251e-01 -1 -1 2
+471 1 1.7119665678072469e+00 5.8953695416344436e+00 4.5930053740843135e-02 -2 0 1
+21 1 2.5641267140454049e+00 6.3080369711057624e+00 5.3936073769936865e-01 1 -2 0
+499 1 4.7164689695839286e+00 6.6465500795405017e+00 1.6611623106065540e-01 -1 -1 1
+253 1 5.9043290983731245e+00 6.1331552003521068e+00 7.5404520106110218e-01 3 -1 0
+302 1 6.9564884663410282e+00 6.1640675729464434e+00 5.7033134187391044e-01 0 -1 -1
+324 1 8.1992983060509594e+00 6.5972265856779417e+00 7.7494747996129210e-01 1 -3 2
+154 1 1.8996589089571863e+00 7.1193783454022102e+00 5.1404963792269565e-01 -3 -1 -1
+284 1 3.0007790559640597e+00 7.2198187648711993e+00 2.3642486802169203e-02 0 0 0
+161 1 3.8133207311484072e+00 7.3201299304675667e+00 5.0202507839899535e-01 -2 -4 -2
+112 1 5.6724794675417822e+00 7.1979165692397187e+00 7.3233205047939343e-01 -1 -3 -2
+39 1 6.4178033104766934e+00 6.8891409921259603e+00 1.5315709015991064e-01 0 0 2
+269 1 7.4906317471214443e+00 6.7375050108085421e+00 4.8287801879068511e-02 3 1 0
+470 1 2.2822847309207428e-01 7.8917014242385868e+00 7.7675485207454809e-01 0 0 1
+84 1 1.3672512040847244e+00 7.9379711016796204e+00 1.6230344562139010e-02 1 0 -3
+381 1 2.6740213254054948e+00 8.0161072175846630e+00 5.8246256296805565e-01 -1 -1 1
+416 1 3.6213693754100857e+00 8.3576521057616233e+00 6.2331560400527297e-01 2 2 -1
+182 1 5.1660049409085866e+00 8.0423641799361949e+00 3.0675573970032322e-01 -2 0 -1
+347 1 6.5429973279348026e+00 7.8912499362162034e+00 4.9818784876300565e-01 -1 1 -2
+380 1 7.4990421428201417e+00 7.6756533739549013e+00 7.5184655726615468e-01 -3 0 0
+19 1 1.4643514526638863e+00 4.2228380130315317e+00 1.3843336624933156e+00 0 3 -1
+430 1 4.3057981012626918e+00 5.0291601875436083e+00 1.2152759600723888e+00 1 1 -2
+212 1 5.0477378389079224e+00 4.2727344176247888e+00 1.5457805058503180e+00 -2 -1 2
+479 1 7.9063585867776194e+00 4.9679041912501303e+00 1.6188677256489217e+00 1 3 -1
+217 1 2.5218672599222897e-01 5.8414577361250837e+00 1.5087288813722908e+00 2 1 2
+147 1 1.1545026869668784e+00 5.3345315151941692e+00 1.5114087897016817e+00 1 2 -1
+13 1 2.8751716646412713e+00 5.2058560720828826e+00 1.0454367061984737e+00 -2 1 -1
+170 1 5.1473087472466297e+00 5.7979187654554183e+00 1.4646868436401976e+00 1 2 0
+204 1 6.2817592200581442e+00 5.1972845711964553e+00 8.9956483663207842e-01 -1 -1 -1
+306 1 8.3865328816009488e-01 6.6369969836196416e+00 9.5403861420405511e-01 1 -1 3
+414 1 1.6815920771923107e+00 6.1626367729687450e+00 1.1744973520180511e+00 0 2 0
+79 1 3.3223472300560521e+00 6.2059256452802716e+00 1.3962300133956274e+00 0 0 0
+116 1 7.6213827835593815e+00 5.9109006810695996e+00 1.2032264335528675e+00 -2 -1 3
+231 1 2.7502476364406934e+00 7.3933926752228123e+00 1.4247539952768773e+00 3 -1 1
+171 1 4.7071880050572981e+00 6.8064285791523735e+00 1.2807974821888473e+00 0 -2 1
+273 1 7.0329000376929018e+00 6.8584131720717227e+00 1.6693583798609657e+00 -2 1 -1
+260 1 8.0496857982994889e+00 7.4547112671714686e+00 1.6374112984134654e+00 3 -1 -2
+43 1 1.0723712760276345e+00 8.0330657119523838e+00 1.4245919245059160e+00 -2 1 0
+208 1 4.3910624755767662e+00 7.9590156044378260e+00 1.1638330407357564e+00 -2 -2 -1
+226 1 5.5077902922207063e+00 8.0548948337249193e+00 1.2764179622713128e+00 0 0 2
+370 1 6.4142184578798505e+00 7.6748965104033502e+00 1.5118878610721513e+00 1 1 1
+85 1 4.9493682223295465e-01 4.4925581352788218e+00 1.7962077237252332e+00 2 0 0
+213 1 2.1575495906041722e+00 4.9142597626853277e+00 1.8811900528366003e+00 0 3 0
+299 1 3.3659976969419567e+00 4.3662606017243588e+00 1.7255850309353928e+00 3 -1 -2
+249 1 4.7082373488231886e+00 4.9470082900514569e+00 2.3877787475724284e+00 0 0 1
+258 1 5.7775488222451887e+00 4.8696821219175135e+00 2.2927307255193869e+00 -1 0 -1
+388 1 6.8398302583746604e+00 4.7642336931962808e+00 1.8670074890749995e+00 1 -4 -3
+91 1 7.7108281252011066e+00 4.3462922152329373e+00 2.4115614404022634e+00 2 -2 1
+415 1 1.3354264256961873e-01 5.2683808667768846e+00 2.4940378524658144e+00 0 3 0
+27 1 2.0564207334277329e+00 5.6999752852325889e+00 2.4901436865643838e+00 0 -2 -1
+465 1 3.6077777180133985e+00 5.3106225786683270e+00 2.2984375934854913e+00 1 4 2
+72 1 1.2115862831734683e+00 6.3271145377400213e+00 2.3088380865765914e+00 -1 -2 -1
+58 1 2.4730215910430200e+00 6.4108021967014599e+00 1.8872921058165837e+00 2 -2 2
+88 1 4.2468496124355051e+00 6.0000736368680991e+00 1.9689248186008255e+00 1 0 -1
+6 1 5.2824888171486872e+00 6.1177755165021708e+00 2.4648490036232666e+00 0 -3 1
+75 1 6.5540148621238368e+00 5.8837981825070473e+00 1.8049347572360399e+00 -3 0 4
+22 1 7.8390642986715990e+00 6.6390504342942007e+00 2.2941937725662940e+00 0 1 0
+128 1 5.0900588055916374e-01 6.9175984002222233e+00 1.8531623061329514e+00 -1 0 1
+193 1 1.7616411201473283e+00 7.3777929885216569e+00 1.8819221158557660e+00 -2 0 0
+389 1 3.8054852300865960e+00 7.1908689074118346e+00 1.6897648139570383e+00 1 -1 -1
+241 1 3.3304867419800805e+00 6.7201719117843943e+00 2.3881392654044515e+00 0 1 0
+14 1 5.6091745306383807e+00 6.8869789388403264e+00 1.7385824045568810e+00 0 -1 0
+307 1 2.6028552221612093e+00 7.5983757451277896e+00 2.5134014261731092e+00 -1 1 0
+401 1 3.3924470446908370e+00 8.2185878004411617e+00 1.7224400376656741e+00 -3 -2 2
+248 1 4.8372436612670020e+00 7.6440956091999794e+00 2.0947200376368853e+00 2 -3 0
+335 1 7.0823280457514208e+00 7.5905134700352388e+00 2.4694687185743178e+00 -1 0 0
+281 1 7.4000691269637358e+00 8.3890728034516613e+00 1.8965880690997261e+00 -1 -1 0
+33 1 2.6215904333511340e+00 4.6944190932346643e+00 2.8031190571610427e+00 1 1 1
+221 1 3.7745777664099962e+00 4.5666560881588492e+00 3.0592511133135365e+00 0 -1 1
+251 1 6.7715334943605834e+00 4.6401458096684580e+00 2.8510683358464712e+00 1 -2 -2
+138 1 1.1922779528936784e+00 5.1973855619084013e+00 2.5689556972218521e+00 0 1 0
+73 1 4.3331751132449305e+00 5.7904710852531096e+00 2.9618985356431899e+00 0 -2 2
+237 1 6.2300560736724488e+00 5.7475979505901522e+00 2.7248929404434143e+00 3 0 0
+77 1 7.3913602469000059e+00 5.6780386160747609e+00 2.6234944353061729e+00 -1 -1 3
+339 1 2.9546703425028309e-01 6.2132195219954411e+00 2.7867557714215443e+00 0 0 2
+69 1 1.3562182919504069e+00 6.0134582221288868e+00 3.2785943115156346e+00 0 -2 1
+150 1 2.9668876558472732e+00 5.9472670316673391e+00 2.8936146954206632e+00 3 -2 -1
+219 1 1.1608118939764323e+00 7.3228681938207032e+00 2.7126116817865631e+00 0 -3 -1
+9 1 2.0627854045350777e+00 6.7454186793237145e+00 2.9360002216641958e+00 0 -2 -2
+3 1 2.9445094497077151e+00 7.1384526732484925e+00 3.3587771406211751e+00 2 -1 -2
+93 1 3.8294061315771235e+00 7.5378392781893275e+00 2.8380550678413003e+00 -1 2 0
+68 1 4.5306997862577427e+00 6.7390306077267583e+00 2.7235217150491495e+00 1 1 3
+35 1 5.4701910601523354e+00 6.9880160912814793e+00 3.3190052755169512e+00 0 0 -2
+174 1 6.2873908725084267e+00 6.7805871501360224e+00 2.7219816088319484e+00 -1 0 2
+369 1 1.7965435829527715e-01 8.0388194202620209e+00 2.5940286664394834e+00 -3 -2 1
+469 1 4.9516921171039039e+00 7.8826764349301754e+00 3.0530014528334970e+00 0 -2 -1
+270 1 5.8608285132223106e+00 7.6946678024150144e+00 2.6778659829007898e+00 0 -1 -2
+196 1 7.6768577919393455e-01 4.8792664640921366e+00 3.5903070116215088e+00 3 -1 1
+235 1 4.2285310787746502e+00 4.2638690385284841e+00 4.1560966227762339e+00 1 1 1
+104 1 4.9767847616154004e+00 4.2048445393860883e+00 3.3962447019771562e+00 -1 -2 4
+363 1 5.8883238331527643e+00 4.9482373301020921e+00 3.4294316309792547e+00 0 0 3
+440 1 8.3695482926311691e+00 4.2129368246316421e+00 3.4018022112738615e+00 -1 2 1
+271 1 1.8521235523587734e+00 5.0753345126075313e+00 3.3807703470063983e+00 0 2 1
+385 1 3.2904276665596646e+00 5.1796485809465160e+00 3.7980367644038786e+00 -3 -1 0
+367 1 4.4116421156217953e+00 5.2602223360914149e+00 3.9453911592241386e+00 1 1 0
+8 1 5.3112651984610038e+00 5.7540585999932050e+00 3.4831459566385630e+00 3 1 0
+445 1 6.8973470747727381e+00 5.7304789961834581e+00 3.7043310817118127e+00 -2 -1 2
+95 1 8.3390772303693836e+00 5.5440620917552232e+00 3.7707087770664116e+00 -2 1 -3
+227 1 2.6907357337748437e+00 5.9773212885609901e+00 3.9377639031978284e+00 -1 -2 0
+45 1 3.9484717591019058e+00 6.4426795460159951e+00 3.6915638730283460e+00 0 1 -1
+62 1 6.2921228111843934e+00 6.5338531152740655e+00 3.8634903896998787e+00 1 0 -3
+309 1 7.4708902093515182e+00 6.5908652825436720e+00 3.6007172393059794e+00 0 -2 0
+70 1 1.1631280783050528e-01 7.0230287218135894e+00 3.4578008409773004e+00 -1 -2 2
+285 1 1.4425208756199777e+00 7.4192404188139784e+00 3.7903676842933867e+00 0 1 -1
+387 1 2.1212416378368082e+00 6.7926691438538640e+00 4.1965603113934069e+00 1 -2 -1
+484 1 4.6356188966878253e+00 7.3783035166039177e+00 4.0565969174155132e+00 0 -1 -1
+98 1 6.8865210518165370e+00 7.3140090488700187e+00 4.1918733358514384e+00 0 -2 -2
+282 1 6.6401301602074470e-01 7.9757076330778913e+00 3.4568068256572881e+00 0 0 2
+203 1 2.5721741898113022e+00 7.9369081377447577e+00 4.0156309667060688e+00 0 3 0
+189 1 3.6912207664687133e+00 7.6806375475121094e+00 3.9059717431750585e+00 0 -1 0
+267 1 6.5713653654298607e+00 7.8128923355065503e+00 3.4078724648171090e+00 -3 -3 2
+155 1 7.9603563078893247e+00 8.1399268391964696e+00 3.4070072411074963e+00 -2 -3 -1
+313 1 1.7289301235151016e+00 4.3022360027322257e+00 4.5514630316827649e+00 0 1 -2
+250 1 3.3811553057719541e+00 4.4820133651244189e+00 4.8062617116575304e+00 1 1 1
+49 1 5.2294045670482578e+00 4.6507731504297771e+00 4.3588420415784839e+00 -1 2 -1
+111 1 6.9650608689197728e+00 4.6666276346705935e+00 4.3879356756979719e+00 1 1 -1
+124 1 8.0933085752880096e+00 4.6382186065764790e+00 4.3125441053619298e+00 1 1 -1
+488 1 5.0933114935797996e-01 5.1359886989747769e+00 4.5948583401263194e+00 -3 1 -2
+48 1 1.5851307073885004e+00 5.5767207395220701e+00 4.2311661806996650e+00 2 -2 1
+123 1 2.5153837170534041e+00 5.0458650845269890e+00 4.5131929409242613e+00 -1 -1 -1
+423 1 5.8208012531410862e+00 5.4948336589690365e+00 4.5916227902397422e+00 2 1 2
+329 1 7.4993364070942770e+00 5.4866100873090584e+00 4.5194723854672301e+00 -2 -2 -2
+334 1 7.1205767430238509e-01 6.1834005542640256e+00 4.2284519502384050e+00 0 1 -1
+146 1 1.3925739327604780e+00 6.6428737862846399e+00 4.8806888882719424e+00 0 1 -2
+500 1 2.2236367185474757e+00 6.0145074663436873e+00 4.9428276989368829e+00 0 1 1
+46 1 3.3290620592760245e+00 5.9295614805296450e+00 4.9789176350871935e+00 1 -1 -3
+374 1 4.7036984817853460e+00 6.2326093994181564e+00 4.3534332596232463e+00 -2 3 2
+52 1 6.8604388860085521e+00 6.3207993870641257e+00 4.9959828655023273e+00 0 -1 -1
+466 1 8.0663702260053096e+00 6.4797694103942982e+00 4.3885769098322607e+00 0 -1 -1
+272 1 6.3857634591010626e-01 7.4306626388140797e+00 4.4685908728843007e+00 -1 0 -1
+331 1 3.1923815943971330e+00 6.8406621847783482e+00 4.3673687027144910e+00 -1 -2 0
+12 1 5.5841339297795631e+00 7.2882403606371549e+00 4.4466343965097535e+00 4 3 0
+178 1 7.5912726284672463e+00 7.0457277620611389e+00 5.0302483408547527e+00 1 -2 0
+376 1 3.5187241629329796e-01 8.3977706803073371e+00 4.6278412560289910e+00 1 -5 2
+117 1 1.3379826357324891e+00 8.3239351364630743e+00 4.2945291322530599e+00 3 0 1
+419 1 1.8896223988418746e+00 7.6393579746671740e+00 4.8168734901107717e+00 0 1 2
+379 1 3.9801436864751545e+00 7.7556819889833708e+00 4.9922836416209275e+00 -1 -2 0
+53 1 4.7488025467298804e+00 8.2113497019933241e+00 4.7088252949914393e+00 -3 0 -1
+28 1 6.4645893658779787e+00 8.2802587137451660e+00 4.5908064174413035e+00 -1 0 -2
+300 1 8.0497419370896441e+00 7.6196002867575432e+00 4.2909429383935933e+00 0 3 0
+83 1 1.5533997706565803e+00 4.2394383012204173e+00 5.5726999917011435e+00 0 0 -1
+304 1 2.5788446900543724e+00 5.0166498820006939e+00 5.6119573167673691e+00 0 -1 0
+51 1 4.2443804124641717e+00 4.2352637746580450e+00 5.1927633388220640e+00 -2 -3 1
+180 1 8.3861058182094546e+00 4.2682396578102644e+00 5.2739914655295381e+00 2 -2 0
+60 1 1.6015559581031933e+00 5.1463292330916399e+00 5.1677022459932900e+00 3 -3 0
+255 1 3.9377408204864559e+00 5.5446289337866919e+00 5.6880007643189954e+00 -1 1 -1
+336 1 4.8593163697287496e+00 5.2245778091909907e+00 5.0488368666183758e+00 0 -1 0
+74 1 5.8562368106306470e+00 5.1343487602136690e+00 5.4844996621256241e+00 -1 -1 -1
+11 1 6.8757956599703460e+00 5.1962144116573894e+00 5.2833696751528123e+00 1 -1 1
+106 1 7.8813740148695439e+00 5.1326569274050362e+00 5.5073989898629776e+00 -2 0 1
+342 1 6.9861903942379322e-01 6.2397324279420259e+00 5.5554818983126264e+00 1 2 1
+366 1 1.5648860489287066e+00 5.8898927909510999e+00 5.8373116466465662e+00 -2 1 1
+153 1 2.7314006683241487e+00 6.0747252624615822e+00 5.8658792109688322e+00 2 0 0
+384 1 3.7649825100367740e+00 6.6140228235604797e+00 5.7068894568556274e+00 3 0 -1
+444 1 5.5919895424131791e+00 6.4007885907393334e+00 5.0609268438363237e+00 0 0 1
+169 1 8.1101285040334883e+00 6.0263607659074259e+00 5.3262646023669920e+00 2 -3 2
+312 1 1.5049751572049341e-01 7.0899457593186419e+00 5.3993285616214672e+00 -4 -1 2
+131 1 2.1405833530478784e+00 6.8932472731161107e+00 5.7016479425245992e+00 -1 -2 -5
+290 1 2.9965099079972810e+00 7.1703954961127128e+00 5.3705836189910157e+00 3 2 0
+240 1 4.7341951239220323e+00 7.1100411829110195e+00 5.1242936547927025e+00 -1 -4 5
+403 1 1.1346681749692125e+00 7.6627110490212882e+00 5.5800616996766177e+00 -3 2 3
+438 1 2.6845809734218404e+00 8.2016169024289791e+00 5.0687325841194584e+00 0 1 1
+420 1 4.5890023008209200e+00 8.1137042645946345e+00 5.6817522441295134e+00 -1 -3 -3
+175 1 5.5503185230040479e+00 7.9962207684468503e+00 5.3390442156703726e+00 -1 -2 1
+305 1 6.7862386693055941e+00 7.6108734242812508e+00 5.4748415197806564e+00 -1 0 0
+341 1 7.4912815420537060e-01 4.3291768420078673e+00 6.1635194432999105e+00 -3 0 0
+291 1 3.3895079643780628e+00 4.7034241504608403e+00 6.1615817441829774e+00 -1 0 1
+372 1 4.6184126650372983e+00 4.6292373343220516e+00 6.3125880555070495e+00 1 -1 -2
+293 1 5.5228203863122962e+00 4.2760181362826257e+00 5.8887719440294752e+00 2 2 2
+461 1 6.6292898049431850e+00 4.3535966489510214e+00 5.9130834363303126e+00 0 -1 -1
+263 1 7.7259268091094802e+00 4.4631499768470926e+00 6.3337285974251110e+00 -3 0 0
+473 1 5.8703225638324330e-01 5.3945075957215893e+00 6.1889975824903258e+00 1 -2 0
+166 1 1.6857123760309740e+00 5.0889285259544463e+00 6.6009674185523135e+00 2 2 2
+132 1 6.2122393744706681e+00 5.4126385838431412e+00 6.6911128610756689e+00 -1 2 1
+32 1 7.1447545314606753e+00 5.2817185569927805e+00 6.2574893275776935e+00 -1 1 0
+475 1 3.1727782520787395e-01 6.3991068893590928e+00 6.5410008153956287e+00 -1 2 0
+283 1 3.2886765713157047e+00 6.4577609617474216e+00 6.6450211825759427e+00 -2 5 -3
+205 1 4.9096514414971573e+00 6.2573093659948649e+00 5.9509143564095108e+00 1 1 0
+158 1 6.0709604726562620e+00 6.0618280221451721e+00 5.9241072976562341e+00 2 -3 1
+1 1 6.8533858129590133e+00 6.5562074315723597e+00 6.5648993855676077e+00 -2 -1 -2
+485 1 3.8125590702509465e-01 7.4616693598979804e+00 6.3609885403345947e+00 0 0 1
+378 1 1.3081995382782592e+00 6.9194359283210023e+00 6.2966360730695206e+00 -1 1 2
+71 1 4.2456384497919455e+00 6.8188407430282236e+00 6.6601493363974615e+00 -1 -4 -4
+320 1 5.9836892193800049e+00 7.1597807912498039e+00 5.9293654675383980e+00 -1 -1 -4
+481 1 7.6691355446374123e+00 7.2329848679784536e+00 6.1548343866270958e+00 2 0 -4
+318 1 1.3774303149915195e+00 8.3818831122591373e+00 6.3319633954681702e+00 -1 3 3
+496 1 2.5904412000483283e+00 7.6460489519926949e+00 6.2257027413180230e+00 0 1 0
+319 1 3.7413059213345305e+00 7.5724262762451815e+00 6.0123092822785358e+00 -2 0 0
+338 1 5.3628717868866635e+00 8.0629328638364353e+00 6.4489666989888104e+00 0 0 2
+358 1 6.2532288322758314e+00 8.3160028104100370e+00 6.1106737733333700e+00 1 1 3
+225 1 7.1188694406828850e+00 8.0966599202958953e+00 6.7037213079900431e+00 0 0 1
+55 1 1.0372861368831470e+00 4.4696141893250934e+00 7.4049774120079697e+00 -1 2 -1
+362 1 2.7501402171762614e+00 4.4083355662417896e+00 7.3996418874276850e+00 1 -1 0
+36 1 3.7396318187704796e+00 4.4801126663955504e+00 7.1074085083223073e+00 2 0 -2
+133 1 4.7860445539287522e+00 4.2902205437459102e+00 7.3711360387569274e+00 -1 -3 -2
+412 1 5.8687821857033606e+00 4.2035753083199188e+00 6.8227572622514323e+00 -4 3 0
+89 1 9.1212528791654535e-02 5.3554487915969240e+00 7.1088625783964092e+00 0 -2 0
+38 1 1.2571311964388701e+00 5.7525434192639207e+00 7.3210178579367868e+00 -1 -5 -2
+337 1 2.7511266822038212e+00 5.4161467707559394e+00 6.9980055112843935e+00 1 -2 -1
+233 1 3.8756258886529467e+00 5.7508116329978627e+00 7.0533607828281815e+00 0 0 -1
+442 1 4.9084237493694953e+00 5.5326539633780083e+00 6.7666717819135362e+00 -4 -4 1
+143 1 5.5776680237268401e+00 5.1621486864820874e+00 7.4635021710959082e+00 1 2 -2
+194 1 7.1453664245542576e+00 5.0984438205910134e+00 7.3333812213368574e+00 0 2 0
+308 1 7.5489760640996861e+00 5.8433222820659871e+00 6.9295207313931844e+00 0 0 0
+286 1 2.1326326019163346e+00 6.2556170719743678e+00 6.7379074391662934e+00 2 0 -2
+61 1 5.6654689088882648e+00 6.2935208603774795e+00 6.9534505775907345e+00 -2 1 -1
+426 1 7.2244687144599562e-01 7.0806885112202815e+00 7.2261712082790313e+00 -1 1 -2
+450 1 1.7213783340257280e+00 7.5184636193370302e+00 6.9767252413839369e+00 4 1 1
+188 1 2.8213470931093725e+00 7.2342621084044723e+00 7.1806231926796817e+00 0 -2 -1
+94 1 3.8026924767351562e+00 7.3932541518378949e+00 7.4596688811177305e+00 -1 2 -2
+252 1 5.1481924139065534e+00 7.2239227196376481e+00 7.0808756518162852e+00 -2 1 -1
+321 1 6.2155610611385042e+00 7.5199920009866501e+00 7.0964075868161212e+00 1 -1 -3
+156 1 7.9944514874782193e+00 7.0252703915440122e+00 7.2541499279788821e+00 -2 1 0
+167 1 5.6917482122629082e-01 8.1417190335213725e+00 7.4962901784513347e+00 2 -2 -1
+330 1 3.3138446283251373e+00 8.2146300363312132e+00 6.8886408760800295e+00 1 -2 0
+195 1 4.3712446295554876e+00 7.8995454090647614e+00 6.7366600234979277e+00 -3 -1 -1
+78 1 5.1856313345157918e+00 8.2127013524100576e+00 7.4620544007464273e+00 0 -1 1
+157 1 7.9011697675935482e+00 8.1764884669087454e+00 7.4266272279794077e+00 -1 0 -1
+373 1 6.7132530681439118e-01 5.0292807223694753e+00 8.1874843860841438e+00 1 2 -2
+56 1 1.7078519119356448e+00 4.9344764637762015e+00 8.1280520422148399e+00 0 2 0
+315 1 3.6234275175752244e+00 4.9451657793378052e+00 8.0476764469560074e+00 -1 -2 0
+478 1 6.4485922344872577e+00 4.3096951707056732e+00 7.7984645181472576e+00 -3 -1 1
+109 1 8.0579925616842267e+00 4.7034410927383776e+00 8.0870676538969590e+00 -1 -2 0
+357 1 2.6884169416542560e+00 5.4427652520296803e+00 8.2033924998655703e+00 0 3 1
+454 1 4.5709826050185614e+00 5.3780110248528246e+00 7.7764192595368709e+00 1 -1 2
+86 1 6.2280039286639228e+00 5.4780735688869227e+00 8.2809905303020894e+00 -1 1 0
+262 1 7.2349855745224341e+00 5.4460551342647063e+00 8.3066978094955939e+00 0 -2 2
+160 1 6.0572932237795518e-01 6.1031638741150802e+00 8.0606222325454144e+00 2 -1 1
+168 1 2.6926304594044645e+00 6.3106981827881823e+00 7.6217004925278191e+00 -1 -3 0
+145 1 3.6817206885980216e+00 6.4794531849637744e+00 8.1206460236595284e+00 -1 -1 -2
+400 1 4.6165015313151514e+00 6.4768264075412638e+00 7.5742703650137084e+00 1 -2 -2
+395 1 5.5951136272170219e+00 6.3298964082146654e+00 7.9312370644766386e+00 0 0 -1
+297 1 6.6597873155694378e+00 6.3692836044578955e+00 7.6364845321455510e+00 1 -4 0
+207 1 8.0115395128671594e+00 5.9375044868831406e+00 7.8466324008781454e+00 -1 1 3
+460 1 6.6754980612335257e-01 7.0521823363262239e+00 8.3451452216672077e+00 -2 1 -3
+44 1 1.5985553198249884e+00 6.8250382119954756e+00 7.7469635466673052e+00 0 1 0
+377 1 4.7397084186898368e+00 7.4631435688157008e+00 8.0338331164055763e+00 0 -1 0
+411 1 5.8132963527118724e+00 7.4646663474419555e+00 8.0261071780587709e+00 2 -1 1
+396 1 7.0242838781762282e+00 7.3479893628886632e+00 7.6603703429086574e+00 2 3 0
+424 1 8.1516255787718581e+00 7.5530513248894016e+00 8.2349155971796755e+00 -2 0 0
+66 1 2.2362583327280716e+00 7.7449451931440727e+00 7.8847519662235594e+00 -1 -2 0
+371 1 3.1656541571472099e+00 8.2451918974189500e+00 8.0415029551628514e+00 1 -1 1
+186 1 4.1675976318493086e+00 8.3247658596031009e+00 7.8802494669419030e+00 1 -1 -1
+413 1 6.4945159878598346e+00 8.3677431053238660e+00 7.6981134933049891e+00 -2 -1 -2
+
+Velocities
+
+7 -1.5197672199477208e+00 -7.1031250708487148e-01 -4.0950627961412567e-01
+398 9.4644999179644840e-01 -8.0422358764146151e-01 -1.1023964746841350e+00
+173 4.8084218242595870e-03 -1.9383808513915850e+00 9.3439099328992314e-01
+462 -1.6390116924948674e+00 -1.2080683544699562e+00 7.6685759600965364e-01
+383 1.2945118204202577e+00 -1.8370931432093225e+00 1.1758972656982776e-01
+288 1.2113220352829337e+00 4.8786819623543370e-01 7.1439798712590263e-01
+303 3.6232856211831610e-01 -3.7927260062054236e-01 -3.0467891118546553e-01
+491 1.6817821327211264e+00 6.3153250223994289e-02 -5.4827183875981267e-01
+187 5.6164532616475686e-02 1.3250338697636479e+00 1.6291294556417517e+00
+163 1.3950518510134631e+00 -1.7090097072259662e+00 -6.0029200570353536e-01
+345 -8.4635427449528189e-01 1.3013642733988193e-01 -3.7582537476409439e-01
+447 -5.5306523194731427e-01 -1.4583063079290524e+00 4.8086237208854654e-01
+108 -3.1879632824306825e-01 -4.9387838912491844e-01 -3.1046215530949173e-01
+427 2.4456665882970424e-01 1.3737057789182878e+00 -1.7474218101951866e-01
+96 -2.8217287015277487e-01 3.9138367367476778e-01 -4.6114700958040086e-02
+494 3.0303709765292064e-01 -9.2861341821366045e-02 -3.6692009603190118e-01
+360 -4.2709052063514102e-01 -1.5167804301477015e+00 -2.6902074926466563e-01
+179 -1.2403873183879999e+00 1.1368400311662936e+00 5.6653449328883521e-01
+136 -1.5859168963178476e+00 1.0728794555657983e+00 5.0876417522892092e-01
+311 -1.4232560247874224e-01 -8.2802934475265932e-01 1.0289469027582252e+00
+242 -1.3753994053365912e+00 3.0823812109914095e-01 1.2519475129774249e-01
+451 1.0684545083916284e-01 -1.3647250068831975e+00 1.2292458701523420e-01
+118 6.5344809858683794e-01 -6.0529931820910909e-01 -6.6821572295625731e-01
+200 -9.1497459004230530e-01 -7.2783581485393822e-01 -8.5265666689286446e-01
+23 1.0121027757280427e+00 -5.8690115135784995e-02 -1.4283745590726701e-01
+30 -1.2406061152582815e+00 3.7490020292637088e-01 -1.4457935973251330e+00
+26 -9.9581994312158117e-01 -1.2441026541932486e-01 -3.6541440712112250e-01
+452 -9.8588039362152369e-01 1.0706468502992672e+00 1.6282162085617651e+00
+439 -1.2386761163593085e+00 4.3106305328993799e-01 5.0410115305724934e-01
+492 2.7094066326642935e-01 6.6318883302565457e-01 1.5999014287193769e-01
+287 -1.7254693201510321e-01 1.1189096817152451e-01 4.1489954379924110e-01
+87 -1.1416496451717975e+00 -7.8732701356097490e-01 -5.1553483379997289e-01
+191 1.2771897324335735e+00 -1.7496443253093807e+00 5.0507874089360993e-01
+316 9.1440987552186193e-01 5.4700363415289210e-01 9.8622452705158947e-01
+176 1.8575029150961730e+00 4.1139607157872565e-01 -7.4590595351664335e-01
+244 -4.6943526746297531e-01 -4.0598167955478592e-01 1.2148844546685273e+00
+463 -6.0334643189530614e-02 9.0710610807033173e-02 -1.1159129056326442e+00
+101 -4.6185710842519001e-01 -1.0745733134156132e+00 -8.4095960640252343e-01
+456 7.3559492687866046e-01 8.4585098605759870e-02 9.9419285892741016e-02
+90 -6.9201778873218364e-02 4.2798282427303436e-01 4.4566581747153294e-01
+457 -2.4132420839469965e-01 8.4836564798364844e-01 1.0955942184355154e+00
+344 6.5538410556645377e-01 1.0527852545027427e+00 6.8154568558028386e-02
+399 -8.3144103937947644e-01 1.9062875610231711e-01 -5.6419101062513155e-01
+472 7.2667243545881360e-02 4.9535613585667015e-01 8.3691870718219108e-01
+364 -2.9526170592863293e-01 -8.7862728527604539e-01 8.2689159018968139e-01
+333 1.7241045239522232e-01 1.0435495829586168e-01 -4.6109306717747689e-01
+223 3.7919285550573256e-01 -1.0672385659469772e+00 3.6984147519214111e-01
+277 -8.8028901289995187e-01 5.8075379122029680e-01 -5.0555689035944396e-01
+487 1.5229361655153211e+00 -4.3334638823410909e-01 -5.2025315373818937e-01
+202 1.1890248672755632e+00 -1.3103428543183313e+00 1.3877359082264291e+00
+139 -3.8593619396281981e-01 -4.1419081865118412e-01 4.8341157555249414e-01
+218 -1.2190042149414249e+00 8.6603581070857749e-01 5.3180226853912571e-01
+10 4.0346230429792890e-01 8.8022045895227152e-01 3.0611723210999553e-01
+64 1.0687583963535543e+00 -2.5590555538042992e-01 8.2852453062456832e-01
+40 -1.0543471069223134e+00 -7.9810717548135568e-01 1.4389073785681245e-01
+328 3.2979243925573831e-02 3.1076559402174242e-01 -1.6410904905031269e+00
+292 -6.3224783054646483e-01 -4.3704038413109603e-01 -2.2759444134328080e+00
+245 -4.7869651996764956e-01 1.4231085014444791e-01 -1.0109564296885869e+00
+148 -7.1011989912373741e-01 -6.8437374624235792e-01 -4.7084060341785217e-01
+435 -2.3710180935788738e-01 7.3150957873543554e-01 1.9569288343148944e-01
+301 -7.3626400316656140e-02 -1.2518892197053937e+00 6.4938262930099341e-02
+476 4.4522311443074564e-01 8.4413304640745324e-01 6.9180275295562699e-01
+310 -4.4579106235383259e-01 -1.4870237388818175e-01 -1.7519046471808755e+00
+137 1.0191269790481958e+00 -1.1871217736355175e+00 6.0687187776921336e-01
+434 -8.0977924479936902e-01 8.2445677967490594e-01 -7.2982619204505017e-01
+184 9.3329968718567657e-01 -7.8960961760367476e-01 -3.2387020805541877e-01
+5 -7.6591643318776403e-01 4.4567507028100856e-01 -5.1504861374485367e-01
+41 -6.0827814801341495e-01 1.0335818250945215e+00 -1.4262577781586459e+00
+410 5.1617643329833041e-01 -1.2505433325731774e-01 6.5548767709779598e-01
+418 7.4452116863445517e-01 5.5176595694257291e-01 4.8433486134467341e-01
+34 -1.0960797951083094e+00 -9.5559899011366922e-01 4.1467430632337771e-01
+67 -1.4762168791195980e+00 7.3902009632630203e-02 -9.8563314451155759e-01
+390 1.1040922756330935e+00 -1.0608530270906485e-01 -8.5480860400114822e-01
+125 1.1001683944330445e+00 2.5213959563273414e-01 -7.0907876564322847e-01
+105 9.3642884637919491e-02 3.2241122070134182e-01 3.4267503174724429e-01
+274 1.4805711826308454e+00 1.9591264275732104e-01 4.6208005550476272e-01
+314 3.3724005635036658e-01 -4.9228354871213487e-01 -5.9105855919176131e-01
+408 4.1789097671328662e-01 2.2054753884758851e-01 9.9409268338014201e-01
+142 1.3529751978153088e+00 8.8819247946851643e-01 -2.9699787225611667e-02
+25 -3.6747378202427655e-01 -1.6653943785151797e+00 2.1594786196965424e-01
+458 -7.3004682150577682e-01 -2.5761460079892318e-01 -7.5492049253862104e-01
+165 -2.6776300065898789e-01 -3.1428309412647404e-02 7.6464836175889062e-01
+421 -2.2112566002008222e-01 -8.0033416193571816e-01 7.4057822704117926e-01
+151 1.6510010513742961e+00 6.2880202489514836e-01 1.3109463507573997e+00
+162 8.6466591751463506e-01 1.2003096922919751e+00 1.6720745976940152e+00
+120 7.1544273833415639e-01 7.8605685992147967e-01 1.5756692842267452e+00
+448 9.4818399052933544e-01 2.0373333697562079e-01 1.1821449360296239e-01
+50 -7.4255028295716818e-01 3.2131856627302258e-01 7.0344624031236247e-01
+443 5.1965861871733798e-01 -1.0469002555669342e+00 4.3373559100868195e-02
+482 1.9661775056161916e+00 1.8911734358361203e+00 7.2949567046678088e-01
+407 1.8582263860937767e-01 -1.3051257268383373e-01 5.0987931207045945e-01
+100 -3.7775697318477114e-01 6.0919017017007682e-01 7.1809182324625254e-01
+99 -1.2684617496938173e+00 7.3671717137305515e-01 -8.1216571664258830e-01
+107 -2.8776644836089277e-01 5.0874100972210130e-02 -4.8277793530428115e-01
+480 -1.8051331679926971e-01 8.6563042329278228e-01 -5.8352716569019136e-01
+115 -1.0589943552297505e+00 -5.2550981762149152e-01 -1.7244688387853080e-01
+278 1.5768829958501078e+00 1.4428098110470169e+00 8.8216722790329116e-01
+206 -1.3653407453215169e+00 5.7341428096126357e-01 2.2629859809279146e-01
+4 4.2054817038258185e-01 -2.9390858515089308e-01 -1.7875237254421117e+00
+214 -1.0426854223671351e+00 -9.9898728143962490e-01 -1.9850479848636837e-01
+65 4.1792586017239108e-01 8.0186431881714215e-01 -2.0107060384389590e+00
+490 6.7355251805377303e-01 -2.0698786089395388e-01 -3.1564646838967253e-01
+230 -1.3855973935201399e-01 1.6134941051796734e+00 2.3723214232474421e-01
+234 1.9442134326447673e+00 -1.3805634063343805e-01 -9.8210270774821284e-01
+92 1.8063255781217027e-01 9.3716955232457000e-01 -1.9187798219061411e-01
+103 3.9559605419305116e-01 -8.7523159162324915e-01 -8.6993815784441930e-01
+129 1.2253789920819727e+00 2.8171983713246046e-01 -1.2180024131333584e+00
+483 9.6349992375661320e-01 -1.6348662648160852e+00 8.3712630375667085e-01
+110 8.0468512955471783e-01 -1.2623668571621947e+00 1.4617362498240638e+00
+126 -2.5741602043303030e-01 -2.5870568275485217e-01 -1.0282926023825125e+00
+276 7.0448925295125497e-01 -1.4486777483518087e-01 4.0589927727544634e-01
+294 4.9277474123671205e-01 1.0788251906341033e+00 1.0720476037391911e+00
+417 1.2134126298835348e+00 6.9341902173441838e-01 -6.5915650495314448e-01
+102 -9.4530772564542864e-01 3.5633646308084904e-01 2.1645216652503570e-02
+228 -3.1477177829390662e-01 -7.3015136189998553e-02 -1.3550661516185267e+00
+2 -1.5167590438599203e+00 3.2942906513575698e-01 -3.9430403043809092e-01
+222 -3.4853423639550507e-01 4.3402827204271471e-01 -8.9055344775375656e-01
+332 7.6671266929186610e-01 -4.2789189576138797e-01 2.2589706911206284e-01
+275 1.0091776783462640e+00 -2.3875723103898955e-01 9.0625273137588058e-01
+243 -2.4008164703199580e-01 2.4602229249214927e-01 -9.2542387373274426e-01
+394 -1.5496683613302659e-01 -1.4731622236607285e+00 4.3712483588631723e-02
+325 -9.9716991278750156e-01 9.0079916710328489e-01 6.1656781470423368e-02
+468 1.1367478364774437e+00 3.7268550263787792e-01 -1.9334998928228118e+00
+113 -6.1637943298580211e-01 -1.4259739417714792e-01 8.5880390205968704e-01
+47 1.4522907038923785e+00 -3.8138661441744320e-01 8.8902241399148096e-01
+474 -6.5326939744671464e-01 -2.9584216080544323e-01 -1.6703076347144621e-01
+343 -2.0865957540293453e-01 4.2320136734663144e-01 -5.2417372636564219e-01
+257 -1.6598412047016537e+00 3.9614126419385248e-01 6.2190463287430520e-01
+422 9.8621254947118736e-02 -4.5653326655140536e-01 -1.6959752095102018e-01
+317 8.1554926779727022e-01 -9.3952932101149422e-01 4.1213701483558629e-01
+322 9.7692194337623842e-02 7.2221211185210599e-03 1.4983319825497168e+00
+453 1.0112117753398511e+00 -1.6043121320019077e-01 -1.5109389287188097e+00
+82 -1.8245270960375951e-01 -1.7227940931260219e-01 9.4914460423273084e-01
+402 -1.3974163926710530e+00 1.0238276956317611e+00 -2.7116721725643461e-01
+201 3.8154565319070316e-01 6.1162789639306392e-01 5.3616806651343329e-01
+159 -5.5623509138788352e-01 -7.3506001159846490e-01 2.9669217335717424e-01
+239 -1.1703971851987414e+00 -2.5624748584715568e-01 1.9987978519039629e-01
+340 -1.1651635328367398e+00 1.4923285973929103e+00 -5.0638764373602618e-01
+246 4.6916366638116123e-02 3.7471219463751682e-01 -8.5595777185358435e-01
+268 3.9776335718715750e-01 -4.1326407010466332e-01 6.4132644428253849e-01
+238 -1.0102989663900468e+00 2.5145560602393102e-01 1.6537809300909093e-01
+183 9.6649446422314420e-02 -3.1320299584600791e-01 8.9787784698607254e-01
+393 -8.1709715097020719e-01 6.5173585509788723e-01 1.3340272254613436e-01
+177 9.2862946550951486e-01 -7.4734865012317631e-01 2.6370187436425798e-02
+97 4.0324698775550633e-01 -6.5060630846020162e-01 4.3381661619662121e-01
+16 9.2225588537666714e-01 5.0761227093429218e-01 2.8224038915514177e-01
+15 7.4631799122735776e-01 -5.1735298399747642e-01 1.0267699639319139e+00
+467 2.9233724948217432e-01 -1.7768548355681282e-01 9.7602334864839035e-01
+404 -1.5591392530118275e+00 6.9331308976828065e-01 -1.9156738913640683e-02
+190 -2.3293475783192552e-01 1.9804103372721136e+00 1.4400561196029799e-01
+429 -3.1240905856412571e-01 5.3399975662794008e-01 -8.3672459106253427e-01
+356 1.1813499076138427e-03 4.5712322524667531e-01 7.2569550209608535e-02
+459 -8.2536401218028255e-01 -5.1587534016495484e-01 -3.2429633054579987e-01
+140 1.4169132709847332e+00 -1.6948863471126016e-01 -5.4590188862129563e-01
+121 -8.0623019357608017e-02 4.3956025735145637e-01 -4.1230464603429395e-01
+76 1.2149513180720075e+00 3.0664283974167705e-01 -6.2206166881984115e-01
+24 -9.0953792569391767e-01 2.7208504534230199e-01 -5.7968255809907288e-01
+215 -3.6635366137979941e-01 4.9083544993599626e-01 5.4540399677247620e-01
+406 -2.8711421371139068e-01 9.8028410382686626e-01 -1.5688211456737056e+00
+495 -2.6064405757207798e-01 1.0014422650657693e+00 1.2195244915445839e-01
+256 5.1502433217852350e-01 2.1618101498582329e+00 -5.5671498574760148e-01
+348 1.5303758850601952e+00 1.3538700733088898e+00 -1.0404602147830582e+00
+18 8.7092721144608365e-01 8.8296303804909981e-01 4.7076145062566271e-01
+259 1.2579194534410862e+00 1.1275351076124240e+00 1.3255628670657002e-01
+261 -1.9457509303164215e+00 -5.7013962206489510e-01 6.5787967104563000e-02
+433 9.5143226323949670e-01 -1.9787864106930605e-01 2.1852616176467912e+00
+119 5.2387728877999162e-01 -7.5008900728698324e-01 1.0156553043596699e+00
+181 3.6547537534957092e-01 4.4856602347738611e-01 1.9502787843157898e-02
+392 1.8883706922750969e+00 1.0836617564317459e+00 6.8587165903540392e-01
+436 -7.5639783907079583e-01 -6.1811736982444421e-01 -2.2826839230281426e-01
+210 -4.5965909404278121e-01 -3.3285948977797364e-01 -2.8815683118847368e-01
+409 -8.9630394160430427e-01 1.2498156320615575e-01 -5.9486610361558201e-01
+437 -1.3458244785393264e-01 1.1611540390359226e-01 3.6707571520967769e-01
+114 -1.1780626363780731e+00 1.1424256338126497e+00 8.1898562672334474e-01
+431 3.9295632541408759e-01 -5.0466339464883803e-01 7.4701651109545830e-01
+296 -2.9562979314792992e-01 9.3794387166245374e-01 4.3821394647736267e-01
+477 -1.0332993738194893e+00 5.9732959339716152e-01 -1.6990304505644747e+00
+199 -1.0627414724922077e+00 3.6527741326574337e-01 -5.0076911469807539e-01
+31 1.1836258822972918e+00 -1.1805527709262840e+00 -1.0148440310730218e-01
+149 -9.7152762187442065e-01 -8.2689424175864179e-01 -9.8308725322137946e-01
+216 -2.6635793686817422e-01 -1.1249304826477751e+00 5.2675272099922754e-01
+425 -2.8287490221194594e-01 4.1086346332975782e-01 -5.6774387340250076e-01
+224 -6.3636066584070272e-01 3.8927738472590379e-01 -1.2043299828812322e+00
+352 -6.1634031766701480e-01 1.9344774717790295e-01 7.8374630494614250e-01
+289 -9.8440193523267117e-01 1.4471012647779624e+00 4.3922631035860088e-01
+455 -1.1942412759322272e+00 1.1300600257912083e+00 -4.7958784865825908e-02
+298 -8.4955866984285322e-01 1.8131854984076790e-01 -1.9010434504555735e+00
+42 1.1523609145851870e+00 1.3198909771653919e+00 -1.1943568765046571e+00
+80 2.0928302112510155e+00 -1.9281727715628849e-01 3.5879525261694012e-01
+127 1.0275071129063864e+00 -7.8560029105042173e-01 8.4680312545585235e-01
+232 3.2836195573764693e-01 1.5921754951459595e+00 -1.4889040346123832e+00
+368 -1.0143580013676627e+00 -6.3286772474382025e-01 5.9003271203631313e-01
+486 3.0052735920053437e-02 -7.5917073216074904e-01 -4.0707609210832352e-01
+209 8.3084043466989521e-01 -1.2819860426930157e-01 9.8450910112172252e-01
+152 1.4018503697498572e+00 -1.3364969077798206e+00 5.1589841760235022e-01
+353 5.3822763779176630e-01 -4.8322987719532096e-01 3.5634065895494110e-01
+361 6.8264929410893807e-02 -1.3682995245096907e+00 -4.9273501999153174e-01
+391 6.7065636846264776e-01 -2.7904582963633018e-01 -6.5303896626046076e-01
+464 7.4567031841775322e-01 1.8046977002555891e+00 -3.9159771733015164e-01
+497 -1.3856699033357778e+00 -9.3599081481125368e-01 1.8204759707532270e-01
+498 1.0568629143757213e+00 -6.8137997517450560e-02 -3.7037143049187105e-01
+264 -7.7894167957263005e-01 2.7383401106980504e-01 7.5098074183721675e-01
+265 1.9614235016832940e-01 -4.2322665725562197e-01 -1.6236881765469688e-01
+350 -6.3256317352651559e-01 -5.9660876051471468e-02 5.2317095757618959e-01
+144 -2.0800773378554721e-01 -1.5156540919629388e-01 -1.8814379829548375e-01
+17 8.2103760580927843e-01 -1.1053143150308307e+00 1.1843881143235824e-01
+441 9.9191589104231870e-01 -1.9479579195707514e-01 1.8130903870970541e+00
+266 -9.4957121369000017e-02 8.6997989048119007e-02 -1.5903074475142429e-01
+375 8.1973242957273296e-01 1.0075441532082390e+00 6.6277118207442265e-01
+351 -1.0694047025049973e+00 1.2185457298237026e-01 7.2731979237547140e-01
+254 -1.4178061472233576e+00 1.0044925984667965e+00 5.9999609355847139e-01
+29 4.7548776272319854e-01 1.5001558567759110e-01 7.4645778473548241e-01
+198 5.7218364285113010e-01 2.9196622753924861e-01 -1.2231490460337893e+00
+197 -1.4204377171882598e-01 2.1322055429678972e+00 3.5360172437812271e-01
+192 -4.3230000337254881e-01 8.1753260459826260e-01 -1.3961239427992276e+00
+220 9.2338173032465304e-02 7.2444278115958327e-01 4.3240465996750244e-02
+432 3.3837153168712325e-01 1.9757543957217949e-01 -3.2138599665961831e-01
+20 7.2972771576502227e-01 -7.1923547937920451e-01 3.8462300862783037e-02
+63 7.3459759072459618e-01 -2.1393390343408181e-01 1.4606085672716668e-01
+446 9.5489174587921177e-01 -2.2284752482645742e-01 -7.3147917056258960e-01
+172 -2.3233356067616504e+00 5.9696639792861073e-01 1.0311292046090428e+00
+405 -1.1779705391325133e+00 2.7846640825986735e-01 9.6440591359561623e-01
+323 1.2758508066094825e+00 1.1576865878083721e-02 1.3415986450967126e+00
+428 4.8021126796596170e-01 4.8003440312842205e-01 -4.1979524804611819e-02
+280 1.1723141734791545e+00 8.4290272518839332e-02 1.1240738308596074e+00
+54 8.2234236592018339e-02 6.9377897211693754e-02 4.6415665343815465e-01
+397 -2.4516753222359439e-01 -3.8647076314136586e-01 -3.1938187998240442e-01
+279 4.7475740365710301e-01 4.2033019401821775e-02 1.0791025172609856e+00
+134 4.6986570792330234e-01 7.5759398359663410e-01 1.3748348906115773e+00
+236 -1.4338534270949166e+00 6.5242359063738475e-01 -4.6879001422000049e-01
+493 -9.5795155221589912e-01 -9.3131490174346532e-01 5.2848598027407112e-01
+346 9.4017276609564226e-01 -1.4423054432512394e+00 -3.4883627652283825e-02
+295 -1.0211326704350645e+00 -1.5688283678435537e+00 2.2737881805456114e-01
+164 4.2933435120637192e-01 1.2908919636558716e+00 -1.8559649323965055e-01
+354 3.1232180266110654e-01 -3.3884174823706786e-01 2.0012570054806617e-01
+449 -4.1956545717722271e-01 8.5033185910749320e-01 -1.6923286514167504e+00
+185 -1.9847768794898153e-02 1.6093024328157031e+00 -3.4278049008935008e-01
+229 3.3354615877618454e-01 -2.1967770508754222e-01 -6.8288738508757751e-01
+135 -3.8598562184690355e-01 2.3459455181853697e-01 1.4591795655915993e-01
+211 -1.1858788850370010e+00 1.3960363241794649e+00 1.1390210005059993e+00
+81 1.3604301094625607e-02 1.2711357052370638e+00 1.3840699383718491e+00
+37 3.5025606454523878e-01 -1.0316562604143946e+00 2.4944393037279400e+00
+355 -4.2713554752038259e-02 1.6441819966987897e-01 -8.2550044716409909e-01
+141 8.1569462514471880e-01 -6.8558748209733145e-01 7.0575287639668483e-01
+122 3.6274405429300222e-01 -4.1985688325391463e-01 2.5223429679090231e-01
+327 5.2689568302324030e-01 -7.1530570005039407e-01 8.1591540939993761e-03
+386 5.2540176252260506e-01 -8.0891002493580211e-01 -2.1080798818740659e-01
+59 3.8008676468324715e-01 1.2302042558959829e+00 -1.2893140123247515e+00
+57 -1.2756375464672689e+00 7.7885421584605408e-01 9.3218921154320622e-01
+489 -4.8772350194378145e-02 1.1774319824237183e+00 -1.3413963962772402e+00
+326 -8.2073133946066312e-01 -1.2685275916429064e+00 -1.0253535003668284e+00
+359 -1.8991447869255085e+00 -1.2154594584519982e+00 -1.7832247863578221e-01
+349 6.4100123949028309e-02 -3.2723074072385872e-01 -1.5565659217165477e+00
+247 4.5734576435732799e-01 4.9511064545143885e-01 -1.5945768728559153e+00
+365 -5.7917383718454207e-01 -2.3494837692508375e+00 -1.5004840205862926e+00
+382 -1.0813649655944142e+00 4.1831946020927230e-01 2.9306887264755938e-01
+130 -5.7488571729114812e-01 -9.6638702807104448e-01 -1.0278487271582966e+00
+471 1.1085411185196428e+00 -5.2404573748396321e-01 9.9734211160337760e-01
+21 -2.3901903994255250e-01 9.3578484419611557e-02 -7.3719660959071831e-01
+499 -4.9906801820206043e-02 4.0499214176262377e-01 -2.9371307883695258e-01
+253 -7.8928864460108283e-01 3.3838305995790624e-01 7.1275694444227405e-02
+302 1.5254343639867396e-01 -2.7881674166920268e-02 9.3627888140226589e-01
+324 -1.1159492478288651e+00 7.0632546920242867e-01 2.5394407536700253e-01
+154 4.8736880051344117e-01 -1.3177717741071062e+00 -7.9660620460177323e-01
+284 1.7716036485389025e-01 -3.6539348914929748e-01 -4.6941135291661629e-01
+161 -2.7479275242316997e-01 7.6982156884799513e-01 -5.7088165723862649e-01
+112 2.7023059482372042e-01 2.0487127767495705e-01 -4.9350658197380731e-01
+39 -1.6840134362063963e-01 -2.0596641915022665e-01 1.2622422574022685e+00
+269 -1.4448376822412348e-01 4.2047192520573229e-01 -1.5751711248887881e+00
+470 -9.0198936075875380e-01 -1.0035762705605118e-01 1.1469701649179411e+00
+84 -3.7165102348228596e-01 1.5718083932531206e+00 1.8715295090631173e-01
+381 6.6740577340943863e-01 -1.9432536761932276e-01 -1.2757754863002870e+00
+416 7.2118119300841677e-01 8.6813375765446710e-01 -8.2350198387929519e-01
+182 3.3694648494832763e-01 -6.5830649973844163e-02 7.0076361524788766e-01
+347 -2.4949061953800500e-01 -1.1432286442922379e+00 -4.5757988288631407e-01
+380 -4.7143472726668367e-01 -2.9180051952103941e-01 6.0581680882348110e-01
+19 7.3007216948068054e-02 9.4947888518620716e-01 3.4627576178512598e-01
+430 -2.2320112834958328e-01 -3.8559102989803251e-01 1.0427665572858709e+00
+212 2.0170777345036919e-01 -9.9332505765821755e-01 1.0550787824425254e+00
+479 -7.7910620356048310e-01 -3.4759422268929079e-02 -5.8756871125828891e-01
+217 -7.0999315500035393e-04 -1.4874035841198494e-01 -1.1389898361525996e+00
+147 3.3953282916732841e-01 1.0503362153417393e+00 8.8554338624429496e-01
+13 1.4952012032076418e+00 -1.3492024227588162e-01 8.6834761707295621e-02
+170 -7.5230462228731629e-02 1.9875829136564169e-01 3.8512887113920530e-01
+204 -3.1459152727076800e-01 -7.2066080044499392e-01 -7.5277710665622377e-01
+306 8.0529112138128323e-01 5.8633344712571456e-01 -1.8256367575393391e+00
+414 -1.7678147014996259e+00 -7.1706970804552028e-01 6.0614094017430153e-01
+79 1.6303375369704216e+00 -9.6446702691566244e-01 1.7014099837302679e+00
+116 -1.7231587507489068e+00 -2.2374070614774770e+00 1.0442058418221465e+00
+231 -9.2546402309298947e-01 3.2157716287919585e-01 -6.2797312032694141e-01
+171 1.4243635122212095e+00 -3.4034411127116748e-01 2.1060050313379719e+00
+273 1.0777993506600285e+00 -1.7160435918146213e+00 1.0194335249558832e+00
+260 1.4544392536719453e+00 9.3765863648494707e-01 1.9642995061847701e+00
+43 -1.1326443879098980e+00 1.4846909715454837e-01 1.6874301063514463e+00
+208 -1.1234489793683757e+00 -1.0408587064067447e-01 -4.8386417041968832e-01
+226 6.0458834484750512e-01 -4.5998597100022531e-01 -4.1420588246526679e-01
+370 -2.5264443636161833e-02 2.2888129565845829e-02 -7.5123334633027034e-02
+85 8.9124431006375637e-01 1.2150234901900336e+00 -1.3794080039677503e+00
+213 6.8750004752856542e-01 1.7643443473184617e-01 -7.6039731517372366e-01
+299 1.1166507011383160e+00 -6.9342398354435130e-03 4.0031150060326554e-01
+249 1.6676259701674601e+00 -1.2916011404415280e+00 8.3081388251616040e-02
+258 -3.3338543070706111e-01 -5.9677208925291603e-01 1.5474505568531705e-01
+388 3.3338877926510010e-02 3.6534261356710357e-01 -9.3163626937108668e-01
+91 -1.0436539497197659e+00 -3.6206368573074071e-01 -3.9406149981573330e-01
+415 1.8022253921012549e-01 -2.5361248238828202e+00 7.8761564433010600e-01
+27 1.6681493439613866e+00 -6.3832374162264238e-01 5.5825898158707521e-01
+465 -3.3883292906124979e-01 -1.9199928938047977e+00 -1.8916637827626086e-02
+72 -7.0256791239588934e-01 -1.3631374858756479e-01 1.1539466869530803e+00
+58 -9.7431680335045678e-02 -1.0635351953490649e-01 -4.5168665276313896e-01
+88 5.2937320884954953e-01 -1.1749454649084838e+00 1.2744645330784524e-01
+6 -1.1593774469007843e+00 5.1413980407296589e-01 1.2944389470552302e+00
+75 5.7374690805841033e-01 7.8433587872715163e-02 -3.9368611836917572e-01
+22 -9.6996881290452630e-01 1.2965746977075312e+00 -1.5394575986278033e-01
+128 -6.4070276272082638e-01 2.2708380115440285e-01 -1.6142126045669323e+00
+193 5.7443781554989515e-01 -1.2458447329459237e+00 -9.3796775534017041e-01
+389 6.7276417854677451e-01 -2.9571653479618748e-01 -2.4423872850138906e-01
+241 -1.4102001294916395e-02 -9.9059498112109945e-01 -8.4827835518272399e-01
+14 1.0515231184651945e+00 4.5617222115757200e-01 -5.2911246318135385e-01
+307 5.0028759749137040e-03 -5.0699437336696396e-01 -2.5790615969461578e-01
+401 3.9207473498457289e-02 -9.2058641117610335e-02 -1.3689269955592848e-01
+248 1.5980225324156891e-01 -4.1600725109692227e-01 4.1253240562945254e-01
+335 5.6526042010051392e-01 -1.0563866873458263e+00 2.2963431232103770e-01
+281 4.7335967523683825e-01 -9.8764893385752195e-01 1.3275577965292436e+00
+33 9.5713349527393091e-02 -6.7578402785937589e-02 1.2308478723868863e+00
+221 -1.2080117435798665e+00 -2.2142477880517361e-01 -7.4060132361069042e-01
+251 5.1352069062594478e-02 -4.7173153686079722e-01 1.3645828375870801e+00
+138 -9.8931419992230929e-01 -5.6403083699472267e-01 4.8437720371021081e-01
+73 -8.7238230525929461e-01 2.2619668651197614e-01 -3.2904255863286136e-01
+237 1.5966476021876643e+00 -1.0734521639966819e+00 -5.7008225474518526e-01
+77 -6.5731386120913948e-01 -1.3672574981662913e-01 3.4397972232069346e-01
+339 6.8164227902965080e-01 -8.7654828372118265e-01 -9.2289632523091991e-01
+69 -1.0710495228647499e+00 -2.2798359247984892e-01 -1.4017813863798740e+00
+150 1.4728554201464288e+00 1.6873571351133543e-01 -4.9418642333439719e-02
+219 3.5347912349947574e-01 1.9808724875609746e-01 -5.0625815831733678e-01
+9 6.3426493841071574e-01 2.5658018877632760e-01 4.7953937763858034e-01
+3 -1.2144659411870629e+00 8.8297859772359033e-01 -2.6732917639533704e-01
+93 3.3822789154655764e-01 8.1143107440094409e-01 -2.7263548606070248e-01
+68 1.6351351046383883e-01 5.8445604848527588e-01 1.2062643207337951e+00
+35 1.6550671586466750e-02 1.6790869037398232e-01 2.5947900830558479e-02
+174 1.4403688502275100e+00 1.4476094634256031e+00 -1.0038411182890872e+00
+369 -7.9303772394565375e-01 -1.1303432398125628e+00 -1.5457440975201582e+00
+469 -5.5239153419630294e-01 -4.7075634498045837e-01 5.0539291079888571e-01
+270 5.0351525849727374e-01 1.3716919044917866e+00 2.8157430421583601e-01
+196 -2.2101193763774998e-01 9.4257812877032954e-01 -1.2881024224381614e+00
+235 -3.7410168233289759e-01 1.9712488864630789e-01 4.3505985658493368e-01
+104 1.0770774810232915e+00 -7.7420116923795979e-01 6.4885799952777634e-01
+363 6.7513909705285891e-01 2.0186757605257277e-01 6.2260156794788402e-01
+440 -2.7563401138205812e-01 4.0196589304635166e-01 6.8696509786942431e-02
+271 3.5486339796858996e-01 -1.0482634846478769e+00 1.0693870635001128e+00
+385 2.0653637543135839e-01 1.6251902525248125e+00 -1.0240800582063148e+00
+367 1.2090890730479817e+00 7.7304993739281713e-01 -9.1196492155951403e-01
+8 1.5650406952874099e-01 1.4761853226272020e-01 7.1667065222651671e-01
+445 -8.1712119890107360e-01 5.0169175756271378e-01 6.3236875667481124e-02
+95 7.1388432342661989e-01 -2.6058252368101411e-02 -1.0159428151424379e+00
+227 -2.1897046900910261e-01 2.4492515107534649e-01 2.2679621024062299e-01
+45 5.0120245022687604e-01 -1.1380921868404339e+00 7.8232214243213327e-01
+62 5.7973338245234629e-01 -7.9117819757367913e-01 -5.4327640325878901e-01
+309 5.3715909018265540e-01 -1.7844975395461696e-01 -1.5521878040430610e+00
+70 -1.3649193486754410e-03 -1.5242800010205164e+00 1.6225001176581824e+00
+285 -1.5639729724408871e+00 1.4626366106935738e+00 -1.2286649953661322e+00
+387 -1.3226131489884976e+00 -1.0695489941103238e+00 -1.4911056977700106e+00
+484 -1.5087954881419843e+00 -2.3407968100714568e-01 8.9471716161806181e-01
+98 3.0624993702155973e-01 -1.5934008419723134e-01 -1.0700290072035785e+00
+282 -1.1678279208732874e+00 8.8816716100523774e-01 -3.7594379985472404e-01
+203 -1.2339287833422024e+00 -1.0851718286318821e+00 -3.5581216293468138e-02
+189 -2.5647410293058537e-01 -3.6970804452537526e-01 -2.7009745024499721e-01
+267 -2.3687566231483211e-02 5.4572876490146272e-01 1.2664988400691950e+00
+155 1.7966821085178111e-01 -1.4254017034472058e-01 -1.0614594824352175e+00
+313 -4.0833555690731410e-02 -4.8193546027639250e-01 -4.9602679807507655e-02
+250 4.3395781720710452e-01 9.1543705256636054e-01 -6.1780594432179192e-01
+49 -5.8496932778818223e-01 2.0845066341835794e-01 -4.5660091846264866e-01
+111 -4.5577164923909808e-01 6.7687347516731056e-01 1.6901306317755600e+00
+124 2.3017035971304120e-01 1.2116381200880159e-02 -1.0715278739263430e+00
+488 1.1047027896127255e+00 -1.2726762145955062e+00 7.7913777050672761e-01
+48 -4.0669146183799126e-01 -1.0181831306551499e+00 6.3005463620771884e-01
+123 -1.2387342693749222e-01 -4.5460712390743935e-01 -5.6241811130968811e-01
+423 1.5051226831159246e+00 -1.0959205280256621e+00 3.5512567969767117e-01
+329 1.5695087401093555e-01 -4.0810545929224507e-01 1.5430948869435961e+00
+334 -1.0908322951892016e+00 -5.4577269465148259e-01 -6.7521769754195848e-01
+146 4.4361993166914260e-01 4.1962079818012665e-01 -3.9049661615798353e-01
+500 -3.0319085884539942e-01 7.6039240701443311e-01 2.0836797720615632e+00
+46 -3.9431430053902639e-01 3.7259615958614579e-02 -1.6080049457594121e+00
+374 9.2250275913940816e-01 -5.4107808677407720e-01 6.5542912197165987e-02
+52 7.3216124676172201e-01 9.4572980131871298e-01 -1.2696251833719743e+00
+466 1.5044833940669236e+00 -1.8483350803538756e+00 9.5057050040256286e-01
+272 -4.0011978947412463e-01 2.0429884556820119e-01 5.4991808303431688e-01
+331 5.7814259377466726e-01 1.7044411336854475e+00 -7.2986308719737536e-01
+12 6.8240059668147224e-01 8.7756006768431916e-01 -1.7546792723746987e+00
+178 -1.1529369201979585e+00 -3.9576843637993819e-01 4.1654001054549866e-01
+376 -1.3678465511836193e+00 -1.5404423718061540e+00 -8.9187926122069050e-01
+117 -1.3804354656475164e+00 -7.9075263667417661e-01 -3.6776627245391275e-01
+419 1.9706999335859632e-01 -1.1815264434314889e+00 1.6458873301525292e-01
+379 2.6784097653047051e-01 3.8806722276897926e-01 4.0603233211791162e-01
+53 1.0521966172519035e+00 5.1164934479578683e-01 9.8577725800730365e-01
+28 1.0129347774638686e+00 1.1058165761432928e+00 -3.9034581257989903e-01
+300 -1.4876024382598509e+00 -6.2467982655378396e-01 1.3535210035045362e+00
+83 9.7907832581926901e-01 1.4855763621624507e+00 4.1007013403839504e-01
+304 -1.8963384707907098e+00 5.2731582964002444e-01 1.0962854464287259e+00
+51 8.5928138726119452e-01 -1.6162178311396377e-01 3.2746315986045632e-01
+180 -3.4136546641226267e-01 1.2771151944969144e+00 -3.9578085701206223e-01
+60 -1.7295103304379962e+00 -9.9492157200179920e-01 1.2224650147307308e+00
+255 5.0740556300071860e-01 -6.6892801585963790e-01 -4.4506165343889975e-01
+336 3.8821455708543523e-01 7.4853146108739210e-01 1.0393955338521590e+00
+74 -8.5185557539809709e-01 5.0099005012093123e-01 6.1187012870886726e-01
+11 -4.8598509694234203e-02 -9.7638648911006976e-01 3.6444261502684167e-01
+106 -8.3362253193144206e-02 -9.2846276284357510e-01 -2.0625632391587234e-01
+342 -1.1850581603543699e+00 -4.6587013939418070e-01 -2.5534356631266192e-01
+366 1.8044603030107276e-01 1.2857436498573065e+00 -4.9002876775305132e-01
+153 5.9355527141792742e-02 8.5309993046714749e-01 1.2601655126485285e+00
+384 -3.3199740732000493e-02 -1.1586262754841152e+00 2.7022536481319620e-01
+444 1.4366137360637837e-01 7.4777392856310121e-01 -1.3874143079892032e-01
+169 -2.2106828869911426e-02 4.3869453276997300e-01 -8.6050040149769946e-01
+312 -9.5175409360636343e-01 -7.9143766981298624e-01 -8.3109524929616485e-01
+131 -1.2661930440833327e-01 4.9560114120296378e-01 1.2481373975929652e-01
+290 4.7988720115251880e-03 4.3307801179133432e-01 7.2537453656012302e-02
+240 -1.4417217340859769e+00 1.4357683004122099e+00 1.4379544254488796e-01
+403 9.8034468727423985e-01 -2.0429774540476020e-01 -1.4454160823908524e-01
+438 1.1944593880989134e+00 2.3483551676455661e+00 1.9984542691081373e+00
+420 1.3486549938776343e-01 -1.0783938772052266e+00 2.4823645088293936e-01
+175 1.6188020186225376e+00 -1.2786027961604105e+00 1.5779664705358573e-01
+305 7.6304434006850930e-01 -1.0609104653684724e+00 -1.3992599279449225e+00
+341 -5.0986549858189378e-01 9.7345649143358179e-02 -5.4583627317992767e-01
+291 1.4937985447982780e+00 1.1345276617898410e-01 1.4735185581275405e+00
+372 2.7438262099508215e-01 2.3642499432513167e-01 2.0103126185436837e-01
+293 -2.0812456222423235e+00 1.2585017395884830e+00 -1.9872753884412903e-01
+461 -1.7366155414350635e+00 6.7563297134023181e-01 -3.0241597324899727e-01
+263 -9.9912758612453856e-01 -1.4606803538340738e+00 -4.8347668759554979e-01
+473 1.5452042518079840e+00 1.3194414166855677e+00 -1.4200655529990689e+00
+166 -1.4143951087621034e-01 -1.1527398628151918e-01 -2.0324403247706951e-01
+132 1.6628716981291343e+00 -2.6751309910688231e-02 -1.1291183234894089e+00
+32 -2.3270952903472897e-01 2.1018074582476372e+00 1.1203402946623044e+00
+475 -1.2604994295453433e+00 -9.5339439518226154e-01 3.3662881048301792e-02
+283 1.2749816341052314e+00 4.7341406662300863e-02 8.5345288522412766e-01
+205 9.6825352209774937e-02 -1.0389613158303228e+00 -5.4505460161674724e-01
+158 8.8743698904591217e-01 1.0689060843560156e+00 6.1104406355038210e-01
+1 5.8273461256327774e-01 4.6658417699512433e-01 7.3964607391058224e-01
+485 4.4170048227791719e-01 -2.8581555293721217e-01 -1.0141724005249320e+00
+378 6.6654429040499424e-02 -1.8224880226392848e+00 1.2128685020610477e+00
+71 -9.1996605782626056e-02 -2.2291664206826806e-01 -1.2402752353304469e+00
+320 -9.5597854323421563e-02 -8.6852053155856812e-01 3.5066557017928385e-01
+481 7.9532304801009490e-01 -9.4180364698072971e-01 1.2495155405965916e-02
+318 -4.1450336396091869e-01 3.4120074122917260e-01 7.3720013366972947e-01
+496 -6.2393347120986375e-01 1.0494054390993381e+00 -1.0590359239552802e+00
+319 -9.5320962685676391e-01 9.2021659866276262e-01 5.5763202369068265e-01
+338 9.5180850251473562e-02 7.4581412234581979e-02 1.6562396473566801e+00
+358 -1.2039319191565336e+00 9.9635011716776112e-01 3.7776599320072252e-01
+225 -2.2878377680357529e-02 5.9393376646738183e-01 -1.7855655501046659e+00
+55 -1.2658412487783444e+00 8.3827650617277571e-01 3.7229167676011687e-01
+362 -1.1190412417889151e+00 -4.1093870066191052e-01 -5.5943417408569930e-02
+36 -1.1944064170110538e+00 5.2834231151049005e-01 -2.5661926163189262e-02
+133 -3.4046853819783990e-01 8.7037698795660379e-01 2.6480748394474063e-01
+412 -8.2433971673189854e-01 -2.4364204997081215e-01 3.2182546651866395e-01
+89 1.2836300411162869e+00 -1.0534639266932924e+00 1.7273414295686953e+00
+38 -5.4018176537409834e-01 1.1189891674084445e+00 -5.4949279328730405e-01
+337 2.0680174457636453e+00 6.8756290663232345e-01 -5.3885707707990127e-01
+233 -1.1580426595635346e-01 1.5352867360847791e+00 1.8920809100075830e-02
+442 -5.2273629379374242e-01 -4.7573534425197805e-01 3.3349171376798270e-01
+143 -6.4456717536444386e-01 -8.2221860127843815e-01 -3.1619033462829965e-01
+194 4.6000888686458724e-01 2.4868203858229221e-01 -1.0844456124091726e+00
+308 7.0412401782623568e-01 1.4705120558853640e-01 -1.6984333842169479e+00
+286 -7.4825969157579242e-01 1.7277684761952672e+00 -4.8658995664543186e-01
+61 -1.4087152482721139e+00 -1.9786675343505181e-01 -8.3049863139435698e-01
+426 -5.9317510584759559e-01 -3.1176204329742879e-01 9.7102624547089234e-01
+450 2.9660411649227686e-01 -1.0106432265344207e+00 -8.0608626908093450e-01
+188 -4.5857896465809017e-01 8.3781782180488956e-02 1.6679261825519407e+00
+94 -2.5105865768429703e-01 -1.0277678550758340e+00 2.0110948522283595e-01
+252 -2.6250057461969262e-01 -8.3779492646801812e-01 -1.2443616941843418e+00
+321 -1.3849227082743394e+00 -7.9961814140752485e-01 -5.2758719721869440e-01
+156 9.7248559724229044e-01 1.4856613149426096e-01 -1.0762319296143268e+00
+167 4.8758255885458768e-01 3.7938735168685944e-01 -8.2720329905212353e-01
+330 4.8147102008722625e-01 -2.9945144880586022e-01 -7.3225876236181275e-01
+195 1.6483881807209144e+00 -1.9598861592898009e-01 -6.2914422221089158e-02
+78 2.3726384124165131e-01 2.7514362192855252e-01 1.7571172900747967e-01
+157 -2.1865090243933563e+00 3.7632441115528098e-01 7.7333806476534650e-01
+373 9.8884980347773899e-01 3.4121263358683873e-01 -3.3626353210018556e-01
+56 -5.1874451216553419e-01 5.0889073330236878e-01 -7.4130187216438104e-01
+315 1.5052362221906064e+00 -6.9840962964633502e-01 -1.5488907100876634e-01
+478 1.1082301092586995e+00 2.9770971389874379e-01 -5.4580262091212584e-01
+109 9.4396405787734605e-01 4.9318782477478235e-01 1.2284569230572757e+00
+357 -1.2371693160029509e+00 4.1376978558912775e-01 3.3529522630256764e-01
+454 -4.4342104600414733e-01 -3.5633346291025164e-01 -2.1131375082322554e-01
+86 -2.6274074185542590e-01 -2.7912975828127135e-01 -3.4564526880694488e-02
+262 -2.0457328043953218e-01 -1.0549970010682790e+00 -8.7569945910877856e-02
+160 1.2508565962389259e-01 -1.1049367732134203e+00 -4.1414673901133031e-01
+168 -5.4833844765552298e-01 9.1122623641564449e-01 -8.1466354261018392e-01
+145 1.9424847461468013e+00 2.4717259671440775e-01 -8.1993106446423478e-02
+400 -7.3927429735196926e-01 9.6278828542903949e-01 9.8355595468926471e-01
+395 -1.7616642510587566e+00 -4.3839255400156163e-01 -4.3538389834393521e-01
+297 -8.1651466743971568e-01 1.9480772762641770e-02 -2.5090574319554337e-01
+207 9.9594630394692474e-01 -5.5333584932915503e-01 -8.1672262386449701e-02
+460 3.8863838706865678e-01 -1.9332061145329088e-01 7.5599739666291188e-01
+44 4.9322334356812642e-01 5.4097595609480897e-01 1.2096517945019982e+00
+377 -2.3588544095100208e-02 4.2843373407529362e-01 -8.1372583759538197e-01
+411 -7.0187880935114388e-01 -8.8403807864596529e-01 3.6139298123719504e-01
+396 -6.1324863673452068e-01 -5.8579142070009704e-01 -1.6980414017487730e+00
+424 -1.4405372913769463e+00 -1.2161267905238037e-01 -7.1275775203655271e-03
+66 -2.2455608800065441e-01 -5.5194985983545303e-02 -2.9031971555487707e-01
+371 2.8693892522161785e-01 -1.3942754264578394e-01 4.1638095135795372e-01
+186 -2.5542578648985415e-01 -2.7435394726950046e-01 1.6931522427410683e+00
+413 5.6007135922818607e-01 1.2189064363493711e-01 -7.5271223366515216e-01
diff --git a/examples/USER/uef/npt_biaxial/in.npt_biaxial b/examples/USER/uef/npt_biaxial/in.npt_biaxial
new file mode 100644
index 0000000000000000000000000000000000000000..152054fce652b64b6052e30ec2b73ab7e3f92278
--- /dev/null
+++ b/examples/USER/uef/npt_biaxial/in.npt_biaxial
@@ -0,0 +1,31 @@
+# biaxial NPT deformation of WCA fluid
+
+units		lj
+atom_style	atomic
+
+
+pair_style	lj/cut 1.122562
+read_data       data.wca
+pair_coeff	1 1 1.0 1.0
+pair_modify     shift yes
+
+neighbor	0.5 bin
+neigh_modify	delay 0
+
+change_box      all triclinic
+
+# these commads show the different methods that may be used to impose
+# a constant stress through isotropic or anisotropic coupling
+fix		1 all npt/uef temp 0.722 0.722 0.5 iso 10 10 5 erate 0.5 0.5 ext z
+#fix		1 all npt/uef temp 0.722 0.722 0.5 z 10 10 5  erate 0.5 0.5 ext xyz
+
+fix             2 all momentum 100 linear 1 1 1
+
+#dump		1 all atom 25 dump.lammpstrj
+
+#dump		2 all cfg/uef 25 dump.*.cfg mass type xs ys zs
+
+thermo_style    custom step c_1_press[1] c_1_press[2] c_1_press[3]
+
+thermo		50
+run		10000
diff --git a/examples/USER/uef/npt_biaxial/log.22Sep17.npt_biaxial.g++.1 b/examples/USER/uef/npt_biaxial/log.22Sep17.npt_biaxial.g++.1
new file mode 100644
index 0000000000000000000000000000000000000000..e6d3b30cdbae469ff63f809c6025a44ad10e4bf0
--- /dev/null
+++ b/examples/USER/uef/npt_biaxial/log.22Sep17.npt_biaxial.g++.1
@@ -0,0 +1,284 @@
+LAMMPS (22 Sep 2017)
+# biaxial NPT deformation of WCA fluid
+
+units		lj
+atom_style	atomic
+
+
+pair_style	lj/cut 1.122562
+read_data       data.wca
+  orthogonal box = (0 0 0) to (8.39798 8.39798 8.39798)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  500 atoms
+  reading velocities ...
+  500 velocities
+pair_coeff	1 1 1.0 1.0
+pair_modify     shift yes
+
+neighbor	0.5 bin
+neigh_modify	delay 0
+
+change_box      all triclinic
+  triclinic box = (0 0 0) to (8.39798 8.39798 8.39798) with tilt (0 0 0)
+
+# these commads show the different methods that may be used to impose
+# a constant stress through isotropic or anisotropic coupling
+fix		1 all npt/uef temp 0.722 0.722 0.5 iso 10 10 5 erate 0.5 0.5 ext z
+#fix		1 all npt/uef temp 0.722 0.722 0.5 z 10 10 5  erate 0.5 0.5 ext xyz
+
+fix             2 all momentum 100 linear 1 1 1
+
+#dump		1 all atom 25 dump.lammpstrj
+
+#dump		2 all cfg/uef 25 dump.*.cfg mass type xs ys zs
+
+thermo_style    custom step c_1_press[1] c_1_press[2] c_1_press[3]
+
+thermo		50
+run		10000
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 1.62256
+  ghost atom cutoff = 1.62256
+  binsize = 0.811281, bins = 11 11 11
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair lj/cut, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton/tri
+      stencil: half/bin/3d/newton/tri
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 2.742 | 2.742 | 2.742 Mbytes
+Step c_1_press[1] c_1_press[2] c_1_press[3] 
+       0    6.3937851    7.0436438    6.4461087 
+      50    7.9020345     7.303289    14.733929 
+     100    8.3214325     8.385843    13.166247 
+     150    5.7168419    5.6083988    9.7224198 
+     200    3.8875348    4.0840705    7.9912973 
+     250    4.2933724    4.2410519    7.7200953 
+     300    4.5452314    4.2434949    8.6709832 
+     350     5.446489    5.6166962    11.300536 
+     400    6.0682558    5.7787878    12.106852 
+     450    5.3884296    5.5066688    9.9100012 
+     500    4.8046418    4.7115986    9.9769201 
+     550    4.9043933    4.6240966     9.319336 
+     600    4.6843238    4.9571246    9.5325487 
+     650    5.1952989     5.195648    9.6187047 
+     700    5.4163364    5.2938289    10.136655 
+     750    5.6723178    5.0670261    11.415221 
+     800    5.9379901    5.6402916    10.815209 
+     850    5.0695389    5.3021432    10.742859 
+     900    4.6498962    4.7111912    9.8453523 
+     950    4.4811641     5.170132     9.421765 
+    1000    4.7501483    4.2420772    9.3510803 
+    1050    4.5973379    5.2046799    9.8632975 
+    1100    4.7879562    4.9051316    9.8658626 
+    1150    5.0528771    5.5048545    10.110913 
+    1200    4.9926563    5.2482081    9.9610863 
+    1250    4.8476232    4.8498859    9.5752989 
+    1300    5.2932038    5.0594534    10.869545 
+    1350    5.4720421    5.0830442     10.69386 
+    1400    4.8175566    4.9863651    10.041898 
+    1450    4.6307145    4.5615459     9.231299 
+    1500    5.0296259    4.5433558    8.7180513 
+    1550    4.8708444    5.1601014    10.256791 
+    1600    5.5098593     5.316773    10.224386 
+    1650    5.5055907    5.0516814    10.272037 
+    1700    4.6965552    5.2436628    9.8272905 
+    1750    5.0212307     4.740601    8.9738802 
+    1800    4.7756351     5.199734    9.9929449 
+    1850    4.7107092     5.177203    10.580427 
+    1900    5.0935608    4.5286844    9.3036832 
+    1950    4.8035855     4.894359    9.4423987 
+    2000    4.5643937    4.7480477    9.4384251 
+    2050    4.9314701     4.990414    10.151815 
+    2100    5.9828824    5.8188026    12.262691 
+    2150    5.5337303    5.4598468    10.136112 
+    2200     4.892172    4.8699674    9.1629905 
+    2250    4.5680591    4.5740533    9.5633545 
+    2300    5.0023535    4.3948659    8.9645774 
+    2350    5.2533056    4.9803884    10.255653 
+    2400    5.3330196    5.3888322    10.021617 
+    2450    5.2095527    4.8765336    10.135381 
+    2500     5.221153    5.2974568      10.2493 
+    2550     5.385331    5.0801192    10.490479 
+    2600    5.3274988    5.0253548    10.147587 
+    2650     4.718677    5.2710337    9.7549521 
+    2700    4.5811521    4.6083971    8.1923164 
+    2750    4.4743752    4.3319737    8.8690805 
+    2800    5.0215013    5.2262961    9.8627954 
+    2850    6.1005107    5.5994942    11.170661 
+    2900    5.9673524     5.362124    10.401699 
+    2950    5.5400849    5.7523993    10.292536 
+    3000    4.9202636    5.1210431    10.125839 
+    3050    4.5286487    5.3586164    9.8634322 
+    3100    4.5719961    5.1615414    9.8027972 
+    3150    5.4091919    4.8581943      9.96938 
+    3200     5.522125    5.3564838    9.7638407 
+    3250    4.9019062    5.2514758    9.2993079 
+    3300    5.2375918    5.1439012    9.4313575 
+    3350     4.750235    4.8692016     10.54193 
+    3400    5.5793211    5.4184157    11.021389 
+    3450    4.9022614    5.3166498    9.4629659 
+    3500    4.6348617    4.4650979    9.0665548 
+    3550    4.7922405    4.8961269    10.255446 
+    3600    4.8914457    5.1158894    9.4736084 
+    3650     5.062771    4.6725475    10.263484 
+    3700    5.4842823    5.7793971    10.342915 
+    3750    5.3136012     5.063065    10.398307 
+    3800    4.9372149    4.9270414    9.5304748 
+    3850    5.2144752    5.1716455    9.7575725 
+    3900    5.0892665    5.1697057     9.918052 
+    3950    5.1124507     5.354702     9.791366 
+    4000    5.1255084    5.1143653    10.913101 
+    4050    5.1891698    4.9913681    9.6871728 
+    4100    4.7663368    4.2435014    8.3815668 
+    4150    4.8060033    4.3415868    9.6553386 
+    4200    4.8548303    4.8006768    9.5995801 
+    4250    5.0976366    5.2683175    10.386444 
+    4300    5.8921937    5.5134696    10.788143 
+    4350    5.8323871    5.5255869    11.199128 
+    4400    5.2464655    5.0005905    10.311055 
+    4450    4.9264849    5.2499854     10.26702 
+    4500    4.4431895     4.536981    8.7489096 
+    4550    4.5180914    4.2080277    8.6525529 
+    4600    5.1782188    5.1224059    10.683341 
+    4650    5.4156233    4.8714464    10.473939 
+    4700    5.3107669     5.224614    10.569391 
+    4750    4.9538022    5.2509475    10.288918 
+    4800    4.6976945    4.8107142    9.8299772 
+    4850    5.1227936    5.0737571    10.440452 
+    4900    4.7580514    4.6375995    9.1971008 
+    4950    5.0647601    4.6470735     9.583131 
+    5000     5.196231    5.7531491    10.409807 
+    5050    5.6691323    5.7163652    12.335701 
+    5100    5.3603738    5.4887106    10.961712 
+    5150     4.455028    4.6494465    9.8096968 
+    5200    4.7596912    4.4804896    9.3762885 
+    5250    5.3144927    5.0113772     9.553101 
+    5300    5.3445266    4.8262035    9.1220802 
+    5350    5.1540657    5.5982676    10.765178 
+    5400    5.1773418    5.2684381    10.452351 
+    5450    4.8946859    5.3283747    9.8015564 
+    5500    5.2009608    4.7183522    9.4558009 
+    5550    5.4158589    5.5005458    10.539505 
+    5600    4.7196831    5.4181991    9.6439249 
+    5650    4.8333571    4.8601728    8.9350189 
+    5700    5.4395698    4.9730096    10.669681 
+    5750    5.2947443    5.6973259    10.020539 
+    5800    5.4391993    5.5255143    10.264969 
+    5850    4.9921388    5.2643827    10.217028 
+    5900    5.0048643    4.7952641    8.9718929 
+    5950    5.1843818    4.5987295    9.6858944 
+    6000    5.0343993     4.946933    9.7436708 
+    6050    4.6202714    5.3502658    10.752915 
+    6100    5.6914422    5.3621964    10.281827 
+    6150    5.1928763    5.9652686    10.923881 
+    6200    5.0030409    5.2013891    10.056308 
+    6250    4.9699876    5.2363753    9.9964211 
+    6300    4.9129606    4.4558458    9.0419952 
+    6350    4.6623958    4.4078264     8.528649 
+    6400    4.9811441    5.1183207    10.261751 
+    6450    5.3644017    5.5153937    10.401295 
+    6500    5.6674981    5.7427566    11.928777 
+    6550    5.1622364    5.3212928    10.067198 
+    6600    4.5954278    5.1645397     10.16724 
+    6650    4.9192712    5.0413326      9.95656 
+    6700    4.6179845    4.5656214    9.3798952 
+    6750    4.7287495    4.5071148    8.7890116 
+    6800    4.8600442    4.8083512    10.245595 
+    6850    5.0515531    5.1609272    10.553855 
+    6900    5.1159742    5.1359869    10.594699 
+    6950    4.8908884    5.0592418    9.5698704 
+    7000    4.7654136    4.7530776    8.9439321 
+    7050     4.779293    4.7534957    9.7244349 
+    7100    5.2265831    5.6869073     10.32717 
+    7150    5.4019177      5.15174    10.457567 
+    7200    4.9817102    5.0596098    10.337574 
+    7250    5.1836654    5.6065238    10.723108 
+    7300    4.2916569     4.457143    8.5419099 
+    7350    4.3906104    4.5439294    9.0805415 
+    7400     4.998572    5.3386063    10.491418 
+    7450    5.1109022    5.0506801    10.636116 
+    7500    5.0248381     5.019932    10.217023 
+    7550    5.0109265    5.1438717    9.9032426 
+    7600    4.6628614    4.6204146    8.9459669 
+    7650    4.8930717    5.0650009    10.049331 
+    7700    4.9373454    5.6265835    10.210644 
+    7750    5.5001067    5.3133253    10.667995 
+    7800    5.0816102    5.0125753    10.591986 
+    7850    4.9638046    5.1969015    9.9728333 
+    7900    4.8438207    4.9217213    8.9978809 
+    7950    4.7318805    4.6248537    8.6806596 
+    8000    5.2808543    5.2892613    10.932535 
+    8050    5.9609722      5.87087     10.47602 
+    8100    5.2190231    5.6693244    11.244536 
+    8150    5.3481127    5.2849903    10.222845 
+    8200    4.7833053    4.7404657    9.2034474 
+    8250    4.5461994     4.510467    10.294452 
+    8300    4.6025175    4.8332817    8.7967546 
+    8350    5.0389897    5.6642908    10.243402 
+    8400    4.8414338    4.8925143    9.3653631 
+    8450    5.5087429    4.7830361    10.831666 
+    8500    5.2678146    5.1697789    9.9105782 
+    8550    5.1211843    4.9097801    9.4165956 
+    8600    5.8239149    5.0821022    10.803261 
+    8650    5.3620154    5.5831747     11.16202 
+    8700    5.1625813    4.8791404    10.537681 
+    8750    4.5622461    5.0157549    10.013227 
+    8800    4.4051517    5.0224553    9.6364273 
+    8850    4.1711629     4.635617    8.5470244 
+    8900    4.7049907    5.2458435    10.100728 
+    8950    4.8568883    5.2360772    9.2306469 
+    9000    5.0091899    5.2203574    10.718541 
+    9050    5.1037824    4.9022451     10.24271 
+    9100    5.0789015    4.9331454     9.173614 
+    9150    5.3865455    5.3427553     11.40199 
+    9200    5.5089482    5.9423232    10.976063 
+    9250    5.1353552    5.0650262    10.040607 
+    9300    4.6761948    4.9155175    9.6413722 
+    9350    4.4780834    4.3934708    8.7049819 
+    9400    4.2561799    4.7906324     9.046134 
+    9450    5.6162819    5.2881846    9.9040868 
+    9500    5.7554547    5.6111262     10.23849 
+    9550    5.4230462    5.5656045    10.908006 
+    9600    5.5045685    4.9818892    9.8929535 
+    9650    5.0541481    5.0183351    9.5226021 
+    9700    4.9712829    5.2395398    9.9996693 
+    9750    5.0960017    5.4419775    10.914719 
+    9800    5.0790688    5.6378474     10.00789 
+    9850    4.9661747     5.114502    9.4585052 
+    9900    5.0133498    4.7456254    9.4572653 
+    9950    5.3318846    4.6643122    10.096292 
+   10000    5.2227687    4.8924305    9.5894615 
+Loop time of 4.78247 on 1 procs for 10000 steps with 500 atoms
+
+Performance: 903298.340 tau/day, 2090.968 timesteps/s
+99.2% CPU use with 1 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.73894    | 0.73894    | 0.73894    |   0.0 | 15.45
+Neigh   | 2.9092     | 2.9092     | 2.9092     |   0.0 | 60.83
+Comm    | 0.32306    | 0.32306    | 0.32306    |   0.0 |  6.76
+Output  | 0.003392   | 0.003392   | 0.003392   |   0.0 |  0.07
+Modify  | 0.6959     | 0.6959     | 0.6959     |   0.0 | 14.55
+Other   |            | 0.112      |            |       |  2.34
+
+Nlocal:    500 ave 500 max 500 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:    905 ave 905 max 905 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    3340 ave 3340 max 3340 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 3340
+Ave neighs/atom = 6.68
+Neighbor list builds = 4580
+Dangerous builds = 0
+
+Please see the log.cite file for references relevant to this simulation
+
+Total wall time: 0:00:04
diff --git a/examples/USER/uef/npt_biaxial/log.22Sep17.npt_biaxial.g++.4 b/examples/USER/uef/npt_biaxial/log.22Sep17.npt_biaxial.g++.4
new file mode 100644
index 0000000000000000000000000000000000000000..9be4413a9690e82a497ff4f9f453b1246d5f7a0f
--- /dev/null
+++ b/examples/USER/uef/npt_biaxial/log.22Sep17.npt_biaxial.g++.4
@@ -0,0 +1,284 @@
+LAMMPS (22 Sep 2017)
+# biaxial NPT deformation of WCA fluid
+
+units		lj
+atom_style	atomic
+
+
+pair_style	lj/cut 1.122562
+read_data       data.wca
+  orthogonal box = (0 0 0) to (8.39798 8.39798 8.39798)
+  1 by 2 by 2 MPI processor grid
+  reading atoms ...
+  500 atoms
+  reading velocities ...
+  500 velocities
+pair_coeff	1 1 1.0 1.0
+pair_modify     shift yes
+
+neighbor	0.5 bin
+neigh_modify	delay 0
+
+change_box      all triclinic
+  triclinic box = (0 0 0) to (8.39798 8.39798 8.39798) with tilt (0 0 0)
+
+# these commads show the different methods that may be used to impose
+# a constant stress through isotropic or anisotropic coupling
+fix		1 all npt/uef temp 0.722 0.722 0.5 iso 10 10 5 erate 0.5 0.5 ext z
+#fix		1 all npt/uef temp 0.722 0.722 0.5 z 10 10 5  erate 0.5 0.5 ext xyz
+
+fix             2 all momentum 100 linear 1 1 1
+
+#dump		1 all atom 25 dump.lammpstrj
+
+#dump		2 all cfg/uef 25 dump.*.cfg mass type xs ys zs
+
+thermo_style    custom step c_1_press[1] c_1_press[2] c_1_press[3]
+
+thermo		50
+run		10000
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 1.62256
+  ghost atom cutoff = 1.62256
+  binsize = 0.811281, bins = 11 11 11
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair lj/cut, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton/tri
+      stencil: half/bin/3d/newton/tri
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 2.71 | 2.71 | 2.71 Mbytes
+Step c_1_press[1] c_1_press[2] c_1_press[3] 
+       0    6.3937851    7.0436438    6.4461087 
+      50    7.9020345     7.303289    14.733929 
+     100    8.3214325     8.385843    13.166247 
+     150    5.7168419    5.6083988    9.7224198 
+     200    3.8875348    4.0840705    7.9912973 
+     250    4.2933724    4.2410519    7.7200953 
+     300    4.5452314    4.2434949    8.6709832 
+     350     5.446489    5.6166962    11.300536 
+     400    6.0682558    5.7787878    12.106852 
+     450    5.3884296    5.5066688    9.9100012 
+     500    4.8046418    4.7115986    9.9769201 
+     550    4.9043933    4.6240966     9.319336 
+     600    4.6843238    4.9571246    9.5325487 
+     650    5.1952989     5.195648    9.6187047 
+     700    5.4163364    5.2938289    10.136655 
+     750    5.6723178    5.0670261    11.415221 
+     800    5.9379901    5.6402916    10.815209 
+     850    5.0695389    5.3021432    10.742859 
+     900    4.6498961    4.7111912    9.8453524 
+     950    4.4811637    5.1701321    9.4217645 
+    1000     4.750149    4.2420768    9.3510788 
+    1050    4.5973376    5.2046787    9.8633025 
+    1100    4.7879517    4.9051339    9.8658578 
+    1150    5.0528775    5.5048671    10.110905 
+    1200    4.9926841    5.2482049    9.9610519 
+    1250    4.8475836    4.8499116    9.5753651 
+    1300    5.2930219    5.0593566    10.869335 
+    1350    5.4722342    5.0830411    10.693439 
+    1400    4.8165803    4.9851498     10.04213 
+    1450    4.6276458    4.5642988    9.2306141 
+    1500    5.0196773    4.5470773    8.7204145 
+    1550     4.878246    5.1583406    10.263895 
+    1600    5.4921049    5.3126759    10.274755 
+    1650    5.4363266    5.1708866    9.9880665 
+    1700    4.9186005    5.2841155    9.5911223 
+    1750    4.9105668    4.7112031    8.9221295 
+    1800    4.9833291     4.886821    9.6573393 
+    1850    5.0729703    4.8331712    10.094971 
+    1900    5.7220173    5.9330299    10.580261 
+    1950    5.3176022    5.7140521     11.11604 
+    2000    5.3247727    5.5172893    10.622834 
+    2050    5.2055235    4.8768078    9.9819356 
+    2100    4.4604981    4.1427844    7.8106592 
+    2150    4.4406592    4.8264893    9.6662695 
+    2200    5.2350836    5.1039145     10.36006 
+    2250    5.3777857    5.3274609    11.357157 
+    2300    5.4888334    5.9555482     10.76346 
+    2350    4.6122564    4.7356468    9.0833059 
+    2400    4.6670237    4.4895588    9.0619648 
+    2450    4.2201177    4.2558397    9.4898835 
+    2500     5.452448    5.4336384     10.50224 
+    2550     5.012581     5.316158    10.324517 
+    2600    5.0880279    5.1264772    10.085103 
+    2650    4.8885834    5.2368982    9.6002032 
+    2700    5.1549266    5.3419678    11.335447 
+    2750     5.497105    5.3643445    9.9990393 
+    2800    4.8826744    4.9875712    10.125435 
+    2850    4.8617121    5.1282348    9.5629401 
+    2900    4.7883446    4.6187804    9.0562496 
+    2950    4.7656266    5.1293592    10.693811 
+    3000    5.2676745     5.110172    9.3512146 
+    3050    4.5749222    4.8413907    10.311305 
+    3100    5.0794819     5.265009    9.1598496 
+    3150    5.2078869    5.1879882    10.412548 
+    3200    5.0187616    4.6226213    9.7266174 
+    3250    5.1541897    4.5157063    9.8355764 
+    3300    5.0721396    5.3545282    10.174356 
+    3350    5.3984495    5.3222207    10.008886 
+    3400    5.1263226    5.1189192    10.361534 
+    3450    5.1251845    4.8312752    9.6546597 
+    3500    5.1133696    5.2646289    10.320765 
+    3550    4.9884235    5.3861707    9.1944042 
+    3600     5.196909     5.203186    10.085965 
+    3650    5.4717592    5.2205442    10.251283 
+    3700    5.4429771    5.3027898    11.385714 
+    3750    5.5688484    5.5980199    10.558193 
+    3800    4.5239453    4.7021545     8.952588 
+    3850    4.6438079    4.6409958    9.3890154 
+    3900    5.1108473    4.8787691    10.665694 
+    3950    5.6398426    5.4386578    10.668189 
+    4000     5.063697    4.9663173    10.513266 
+    4050    4.8770847    4.4603573    9.8101845 
+    4100    4.3950768    4.3579384    8.3402845 
+    4150    4.3355402    5.0429352    10.323111 
+    4200    4.7688478     5.051487    9.0632339 
+    4250    4.9879366    5.3367146    10.409554 
+    4300    5.4578199    5.4889206    10.418789 
+    4350    5.6598068    5.4538572    10.842349 
+    4400    5.3705312    5.3796871    10.430547 
+    4450     4.663804     5.058851    9.2705923 
+    4500    4.3439039    4.3523422    8.1747925 
+    4550    4.5414802    4.3750772    9.2702452 
+    4600    4.9216199    5.2897069    10.747727 
+    4650    5.5154852    5.9628437      10.5168 
+    4700      5.45199     5.382787    10.654544 
+    4750    4.7525419    5.4701385    9.3189378 
+    4800    5.3696365    4.6134207    9.4455676 
+    4850    5.2444123     5.035993    9.4148435 
+    4900    5.6006507    4.8536828    10.283579 
+    4950     5.155711     4.978634     10.58973 
+    5000    5.0854607    4.9853307    9.2414296 
+    5050    5.1098462    4.7349164    9.8739001 
+    5100    5.1989395    5.0217416    9.8780949 
+    5150     5.612116    5.2165007    10.338464 
+    5200    5.0571356    5.3109846    10.685262 
+    5250    5.4832657    5.0371665    8.9420853 
+    5300    4.5312549    4.9629392    8.2478064 
+    5350    5.1617038    5.0533699    10.452218 
+    5400    5.7873394    5.6776926    11.926526 
+    5450    5.7002516     5.243239    10.940265 
+    5500    4.7896799     4.898544    10.163856 
+    5550    4.9155627    4.9567495    9.4445476 
+    5600    4.2447343    4.5045912    8.7732992 
+    5650     5.070197    4.7343938    9.9908239 
+    5700    4.9609446    5.0901934    10.812786 
+    5750    5.4001631    5.5552888    10.085896 
+    5800    5.4209837    4.7153245    9.6865245 
+    5850    4.9801041     5.180338    9.8930439 
+    5900    5.3423595    5.2341361    10.294159 
+    5950     5.683047    5.6830131     10.24313 
+    6000    5.0618789    5.4533644    10.713412 
+    6050    5.4034888    4.6341621    10.031976 
+    6100    5.1934299    4.7525347    9.1287151 
+    6150    5.0092398     4.806931    10.024305 
+    6200    5.3046516    5.3083532    9.6396223 
+    6250    5.2824046    4.7957557    10.305279 
+    6300    5.3007029    5.0071874    11.175322 
+    6350    5.1128883     4.990408    9.3439118 
+    6400    5.0543602    4.9971378    9.8259954 
+    6450    4.8843692    4.9116343     10.08132 
+    6500    4.5966453    4.8042861     9.160272 
+    6550    4.8510961    4.7096646    9.8009968 
+    6600     5.658307    5.2330511    10.739519 
+    6650    5.2374409    5.3241249    10.291779 
+    6700    4.9006975    5.0036186    9.9872029 
+    6750    5.2209104    5.1826025    9.5671875 
+    6800    4.5238727    4.3070529    8.6072303 
+    6850    4.2617247    4.7551571    9.7302077 
+    6900    5.6499354    4.8714257    10.723511 
+    6950    5.6881769    5.1800721     10.18134 
+    7000     5.588834    5.0104896    10.304105 
+    7050    4.9404045    4.8589121    9.7096741 
+    7100    5.2208179    4.9339808    9.7737491 
+    7150    5.4507842     5.046485    10.734783 
+    7200    4.9737171    5.5760486    9.1627431 
+    7250    4.5967409     4.750471     9.315832 
+    7300    5.5147308    5.3202861    10.542679 
+    7350    5.7730418    5.5363574    10.384376 
+    7400    4.9879586    5.2837443    9.4485798 
+    7450    5.1862431    4.8357842    10.017598 
+    7500    5.4528245    5.1864957    10.941774 
+    7550    5.4202434    5.1089468    10.128264 
+    7600    4.8063537    4.8723653    9.0364984 
+    7650    4.3144701    4.6148377    9.4939315 
+    7700    4.9033831    5.5327473    9.9054613 
+    7750    5.0693093    5.0768222    10.473081 
+    7800    5.0591805    5.6009473    10.006225 
+    7850      5.15269     5.468248    10.404619 
+    7900    5.1971759    5.0615117    9.9614488 
+    7950    4.9771238     4.886213    9.4730722 
+    8000    4.7731123    5.1111433    9.9550597 
+    8050    5.1655183    5.4432364    9.6649669 
+    8100    5.4597006    5.4026039    11.631184 
+    8150    5.3229643    5.4394219    9.9830611 
+    8200    4.5420712    4.9359646    9.0121988 
+    8250    4.5455108    4.3883996    10.304568 
+    8300     5.219721    5.1571958    10.305462 
+    8350    4.7291561    4.7391636    9.0768372 
+    8400    5.3262934    5.8221591    11.065466 
+    8450    4.7583026    5.2282086    10.291955 
+    8500    4.9174536    4.5701979    10.140444 
+    8550     4.459922    4.8293188    9.4438719 
+    8600    4.7962584    4.5811071    9.5158666 
+    8650    4.6097275     4.431952    9.4350505 
+    8700     5.166554    5.2000584    11.162202 
+    8750    5.2353596    5.1668944    10.829751 
+    8800    5.3150111    4.9983333    9.8402224 
+    8850    4.9571197    4.9196589    9.4880549 
+    8900     4.902223    4.7539187    10.002425 
+    8950    4.9531983    5.0517321    9.7838444 
+    9000    5.4003802    4.9900303    10.550808 
+    9050    4.9254643    5.0678701     10.24321 
+    9100    4.9902263    4.9056928    9.0326566 
+    9150    5.1003677    5.1555374    10.049056 
+    9200    5.2358131    5.5834504    10.354698 
+    9250    5.5781649    5.1188429    10.361369 
+    9300    5.2100192    5.0737267    10.074694 
+    9350    5.1462976    4.8010759    9.0279769 
+    9400    5.0177693    5.4890092     10.03612 
+    9450    5.2507957     5.541141    10.639854 
+    9500     5.836784    4.9498236    10.288015 
+    9550    5.4698183    5.8761209    10.979924 
+    9600    5.0534023    5.0043428    10.436057 
+    9650    4.3619773    4.5413125     9.047513 
+    9700    4.3777508    4.7902251    8.9501908 
+    9750    4.6851974    4.3152085     8.825764 
+    9800    5.7312665     4.850913    10.247637 
+    9850    6.2290614    5.5480801    10.934907 
+    9900    6.0316892    6.1098926    11.562223 
+    9950    5.6370814    5.3933342    11.148805 
+   10000    4.6429923    5.0853156    9.4267693 
+Loop time of 3.06414 on 4 procs for 10000 steps with 500 atoms
+
+Performance: 1409858.531 tau/day, 3263.561 timesteps/s
+99.4% CPU use with 4 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.27954    | 0.30916    | 0.3866     |   8.1 | 10.09
+Neigh   | 1.0657     | 1.129      | 1.2853     |   8.5 | 36.85
+Comm    | 0.56334    | 0.80526    | 0.89816    |  15.6 | 26.28
+Output  | 0.0032749  | 0.0041527  | 0.006541   |   2.1 |  0.14
+Modify  | 0.6522     | 0.67016    | 0.69843    |   2.1 | 21.87
+Other   |            | 0.1464     |            |       |  4.78
+
+Nlocal:    125 ave 130 max 122 min
+Histogram: 2 0 0 0 0 1 0 0 0 1
+Nghost:    459.75 ave 470 max 448 min
+Histogram: 1 0 0 0 0 1 1 0 0 1
+Neighs:    837 ave 867 max 811 min
+Histogram: 2 0 0 0 0 0 0 0 1 1
+
+Total # of neighbors = 3348
+Ave neighs/atom = 6.696
+Neighbor list builds = 4589
+Dangerous builds = 0
+
+Please see the log.cite file for references relevant to this simulation
+
+Total wall time: 0:00:03
diff --git a/examples/USER/uef/nvt_uniaxial/data.wca b/examples/USER/uef/nvt_uniaxial/data.wca
new file mode 100644
index 0000000000000000000000000000000000000000..889ba4d2f2a8d2cb3892bda144544f23140ccc03
--- /dev/null
+++ b/examples/USER/uef/nvt_uniaxial/data.wca
@@ -0,0 +1,1022 @@
+LAMMPS data file via write_data, version 14 May 2016, timestep = 400000
+
+500 atoms
+1 atom types
+
+0.0000000000000000e+00 8.3979809569125372e+00 xlo xhi
+0.0000000000000000e+00 8.3979809569125372e+00 ylo yhi
+0.0000000000000000e+00 8.3979809569125372e+00 zlo zhi
+
+Masses
+
+1 1
+
+Pair Coeffs # lj/cut
+
+1 1 1
+
+Atoms # atomic
+
+7 1 2.4137737201272674e-01 2.0017433398687118e-01 3.6157393311703534e-02 2 1 2
+398 1 1.6739594898193126e+00 3.0313452824803544e-01 5.7986723161362197e-01 -3 0 1
+173 1 6.1370793327804556e+00 4.4978954867119525e-01 3.6568951377817088e-01 -4 1 0
+462 1 7.8754716784931862e+00 5.2908038473333074e-01 7.9185633743762940e-01 1 1 -2
+383 1 6.5373096080170212e-01 1.5337525983981986e+00 6.3208419126059423e-01 2 0 1
+288 1 1.9354124990672374e+00 1.3318105136786291e+00 3.6211635210084403e-01 -1 1 0
+303 1 2.9661308460819318e+00 1.1444058564802859e+00 6.5604777151148241e-01 -2 2 1
+491 1 7.1514675802966758e+00 1.3588685826107376e+00 1.2875068928885325e-01 -1 0 0
+187 1 3.0670386025081497e-01 2.4850915964494620e+00 7.0944157374329464e-02 0 -1 0
+163 1 1.2805212773629451e+00 2.4034621328433090e+00 2.0184086197146742e-01 0 1 -1
+345 1 2.5952244948945173e+00 2.3405536448220743e+00 3.0019149048630317e-01 -3 2 1
+447 1 3.9059163101716741e+00 1.6849179478858272e+00 6.0306488750926446e-01 -2 3 -4
+108 1 4.9215629854759335e+00 2.5174843149522088e+00 6.2779912940916158e-02 1 0 -1
+427 1 6.3978861821382305e+00 1.9201774263431104e+00 4.3981049730412797e-01 -1 -2 -3
+96 1 7.4061541304605534e+00 2.4828308207907996e+00 2.4705543772216432e-01 0 -3 2
+494 1 8.0759482343563711e+00 1.6914241739458744e+00 6.8482420189075921e-01 -3 1 2
+360 1 3.1886448226656765e+00 3.1126224743956139e+00 5.4052217066940123e-01 1 1 -2
+179 1 4.2399500260508116e+00 2.8640916432630559e+00 7.5278462735962870e-01 -1 1 1
+136 1 6.2603021059339365e+00 3.2638426060411403e+00 9.9163685662983045e-02 -2 0 3
+311 1 8.0699058291845773e+00 3.1228722160017734e+00 7.5458759573094458e-01 -3 -3 3
+242 1 8.1363443818268044e-02 3.8061968817045986e+00 5.4644298839992535e-02 1 1 1
+451 1 1.9291602479953753e+00 3.5896469823727863e+00 2.3008537852985023e-01 -1 0 0
+118 1 5.3566912079946780e+00 3.5234509548370374e+00 6.7801317822012042e-01 1 -3 -1
+200 1 6.7588369512940338e-01 6.1719413314115790e-01 9.9275606042642373e-01 -1 2 3
+23 1 2.1873327028525060e+00 4.2754803645009956e-02 1.4143560341384132e+00 -3 -1 0
+30 1 3.9592898204338112e+00 6.8093268022015185e-01 1.2987004742604558e+00 -1 -2 -2
+26 1 5.1175076813830467e+00 5.1869033157649591e-01 9.5797839381311178e-01 0 1 2
+452 1 6.5582763761917411e+00 3.4514602671261341e-01 1.2368598815410630e+00 -2 0 -2
+439 1 8.3865135868155445e+00 1.4825845267600596e-01 1.6737363681796917e+00 -2 3 1
+492 1 1.6645816560197648e+00 9.7557398318995125e-01 1.3973497820249918e+00 -2 3 1
+287 1 2.8845951500519784e+00 9.2974386224238703e-01 1.6104516406361071e+00 2 -1 2
+87 1 5.9282888423658395e+00 1.2716232081523247e+00 1.1744299163086194e+00 -1 2 -1
+191 1 7.0465887128985001e+00 1.3447129135151838e+00 1.1378718594147066e+00 -1 2 0
+316 1 6.6785856436835267e-01 2.4243200282222617e+00 1.1253836626025919e+00 0 1 0
+176 1 1.8165460632802826e+00 2.1437252128100086e+00 1.0868234266569741e+00 1 1 1
+244 1 3.5041229236927576e+00 2.3924032537771125e+00 1.3551249499971583e+00 -3 1 4
+463 1 4.9238071200835165e+00 1.7488964460038467e+00 8.7393010597649756e-01 -2 2 1
+101 1 4.7502017837518187e+00 2.5070270251466997e+00 1.6042073679153410e+00 -3 0 -2
+456 1 6.7262574842655534e+00 2.1937608587635533e+00 1.5819520420856528e+00 0 2 3
+90 1 7.8106680805872131e+00 2.3465270292028992e+00 1.4066520475037838e+00 0 -4 1
+457 1 2.2543678933961773e+00 3.1652213402916889e+00 1.1451988834668292e+00 -1 0 0
+344 1 5.9751963550424136e+00 2.8740523385222234e+00 1.2530570351392452e+00 0 1 -1
+399 1 8.2525481582297566e-01 3.4796551803028120e+00 1.0452539946382762e+00 1 -1 -1
+472 1 2.5500022667255768e+00 4.1144011701979659e+00 1.1937493716055980e+00 0 1 2
+364 1 3.2074279852156709e+00 3.3909867375667604e+00 1.5492265188845586e+00 -1 0 0
+333 1 4.2063776055408209e+00 3.7436012461483292e+00 1.2983951160694676e+00 -1 1 1
+223 1 6.0720407506223024e+00 3.9437056464141231e+00 1.5634726007362729e+00 0 0 1
+277 1 6.8850289844945918e+00 3.5744109988378070e+00 1.1843729982426427e+00 2 0 0
+487 1 8.0236797280148657e+00 4.0759691449476652e+00 1.1816920447826709e+00 -1 0 5
+202 1 1.1425010515906946e+00 1.1629395296284512e-02 2.5086432758529211e+00 -1 -2 0
+139 1 2.3147555111337756e+00 1.8665688332261610e-01 2.4220734387693037e+00 -5 0 1
+218 1 3.5367098631556342e+00 6.2760250051756761e-01 2.4478818232869410e+00 1 0 -2
+10 1 4.3729774450168737e+00 1.6632191047636544e-01 2.1079853667170236e+00 -2 -1 -2
+64 1 5.7303792911825742e+00 2.9006680684615282e-01 1.9605629224377070e+00 -2 1 2
+40 1 6.5824557178778531e-01 8.5028950299011019e-01 2.1496168105059708e+00 2 1 1
+328 1 1.5589817713112594e-01 1.6225523918451032e+00 1.7310231111691350e+00 3 0 0
+292 1 2.1314993621742819e+00 1.2260233186264009e+00 2.3652294967955245e+00 0 0 -1
+245 1 4.6666032401598603e+00 1.5042368206958292e+00 1.9142905720694421e+00 -3 1 -1
+148 1 6.7720428171922631e+00 8.7008110148972428e-01 2.0934267451930806e+00 0 0 1
+435 1 7.7341388550057273e+00 9.1983544373491155e-01 1.8223856537522831e+00 -2 -1 1
+301 1 2.9295611364078922e-01 2.4942786265027763e+00 2.2355337509297120e+00 -2 -3 -2
+476 1 1.3648021789963285e+00 1.8605609454926342e+00 2.0851101905225424e+00 1 0 -1
+310 1 2.6428836469132526e+00 2.1636738846129742e+00 1.7827293416008847e+00 3 2 0
+137 1 3.6368685754799719e+00 1.7198335009724290e+00 2.1014306554853008e+00 1 -1 1
+434 1 5.6400362255303138e+00 2.0264029757942144e+00 1.7648993119896872e+00 0 -1 0
+184 1 7.6228520959849160e+00 2.1704001606029473e+00 2.4690779972373509e+00 0 1 -1
+5 1 1.5547914352159820e+00 3.0246432147284117e+00 1.9341058666944422e+00 5 1 -2
+41 1 3.5394530649909939e+00 2.8672493763709368e+00 2.3721754433370239e+00 0 0 0
+410 1 5.2622202291807252e+00 3.3532222678989076e+00 2.0249715425869179e+00 2 1 1
+418 1 6.2986243250817040e+00 2.9042203242504363e+00 2.3450580832445986e+00 -1 2 0
+34 1 6.5845359986575269e-01 3.5330771386953552e+00 2.1708302646106170e+00 -2 3 1
+67 1 2.2321109286615104e+00 3.8391986294826608e+00 2.3598318070524229e+00 1 -1 0
+390 1 4.4001972293445766e+00 3.7784400898230768e+00 2.3530190506208175e+00 1 2 -1
+125 1 6.8584035481270371e+00 3.7859398002695177e+00 2.2262509649550242e+00 -3 1 3
+105 1 8.0142836210079444e+00 3.4253500628703644e+00 1.9822510084209746e+00 1 0 -2
+274 1 4.5713678557713822e-01 5.4244335571632307e-01 3.0849174937154351e+00 1 -2 1
+314 1 1.8447059547300777e+00 1.1692544876557610e-01 3.3199499683474132e+00 2 0 -1
+408 1 3.1814603728989130e+00 7.4387930556111925e-02 3.3119410401147560e+00 -1 1 1
+142 1 5.0629800765951494e+00 4.5529434814892644e-01 2.8994044435235393e+00 2 -1 -3
+25 1 6.1307890615815195e+00 4.8707110399724851e-01 2.9156227033318936e+00 0 2 -4
+458 1 7.5149810474983081e+00 4.9072663758191898e-01 2.7754564040841219e+00 2 -4 0
+165 1 1.3117326379233891e+00 1.1343392736952256e+00 3.1076574691841947e+00 -2 2 2
+421 1 2.7633861789985827e+00 9.5478806243591052e-01 3.1957876340691875e+00 -1 -3 1
+151 1 4.7220243778881930e+00 1.4282004346636548e+00 3.2642378377455432e+00 0 0 -1
+162 1 5.6468847674841811e+00 1.3364063405496600e+00 2.5903751818453160e+00 4 -4 -2
+120 1 8.2129099591176686e+00 1.3575499019485984e+00 2.6634845511723606e+00 0 0 0
+448 1 8.8805142887530297e-01 2.2694079305496020e+00 3.1897834611367313e+00 -1 1 3
+50 1 2.9687835700409062e+00 1.9135157390691884e+00 2.8548074734572992e+00 -3 0 1
+443 1 4.0462687470728396e+00 2.3330394967344139e+00 3.1667340148022216e+00 0 1 -2
+482 1 4.9427462157614270e+00 2.1611442624383619e+00 2.6370759110090005e+00 0 3 3
+407 1 5.7753726348779013e+00 2.4435329312939671e+00 3.1756094964019836e+00 0 1 -1
+100 1 6.6610073695947598e+00 1.9476592422501362e+00 2.6046914024931409e+00 -2 -2 1
+99 1 3.5098260053604374e-01 3.2489377905726768e+00 3.1253385231740438e+00 -1 1 -1
+107 1 1.3942178307183000e+00 3.1914627339242005e+00 2.9813408440396350e+00 2 0 1
+480 1 2.3066257454835091e+00 2.6794898513136354e+00 2.7495660819550110e+00 -1 -2 -1
+115 1 3.1214377398204638e+00 2.9223083977660864e+00 3.3233156913824242e+00 1 2 1
+278 1 5.0520729931158854e+00 3.1424127338010743e+00 3.1175324674801042e+00 0 0 -1
+206 1 7.7081484276756580e+00 3.0580419350207482e+00 3.0362913717240096e+00 0 -1 -4
+4 1 1.3000434256419220e+00 4.1733384323360649e+00 2.6659576195319934e+00 1 0 2
+214 1 3.2669228431804429e+00 3.8002115979716740e+00 2.6408106015701289e+00 0 0 0
+65 1 4.0280349657536760e+00 3.3861374196314542e+00 3.2331032594455227e+00 2 1 2
+490 1 5.8571557116011554e+00 3.8735942685941813e+00 2.8613897619661586e+00 -1 -2 0
+230 1 2.4828380364158602e+00 5.5335473677805791e-01 4.1950137944148906e+00 2 -1 3
+234 1 4.2752320357614213e+00 1.4212800762204394e-01 3.7635975156407624e+00 -2 1 -4
+92 1 5.6957646578006917e+00 2.5374658514218495e-02 3.8304068113906884e+00 0 -1 0
+103 1 7.0230616578295741e+00 4.6755655210711161e-01 3.8662999072020967e+00 1 -2 1
+129 1 8.1671299320274162e+00 5.7067862198193264e-01 4.0702212708530077e+00 -1 -2 0
+483 1 3.2862260723228903e-01 1.4557716141563948e+00 3.6577409481728278e+00 -3 4 -1
+110 1 1.5047240722693447e+00 1.0396158231062973e+00 4.0843832165787424e+00 -2 -1 0
+126 1 3.7388270640303456e+00 1.1962750537841655e+00 3.4335639532998909e+00 3 0 0
+276 1 5.6087889039130960e+00 1.1358153696317190e+00 3.7428282467743927e+00 0 2 1
+294 1 6.5127919732673050e+00 1.4345197299837358e+00 3.4448769173070444e+00 -2 0 2
+417 1 7.5734653423068021e+00 1.3756634588591667e+00 3.5078056411981438e+00 -1 1 0
+102 1 1.1107713360470239e+00 2.0167473232388695e+00 4.1576658165713738e+00 -2 0 0
+228 1 2.0072330566273600e+00 1.8105875233272237e+00 3.4619908126078429e+00 2 0 -1
+2 1 3.2101113937010530e+00 2.0183489554560774e+00 3.8822820800419566e+00 0 1 -1
+222 1 4.2730523271288350e+00 1.9092752363315002e+00 4.1553216684001049e+00 1 -1 0
+332 1 6.5042309397343736e+00 2.2214607151080448e+00 4.1057034120156182e+00 0 1 3
+275 1 7.5099659540449100e+00 2.2790896507455778e+00 4.1566328430638730e+00 -1 0 -1
+243 1 8.2736142057938107e+00 2.3393256795577462e+00 3.4990413844829384e+00 0 0 -2
+394 1 5.8512440497757878e-01 2.8807282965119669e+00 3.9612614891397042e+00 1 3 1
+325 1 2.1875714466797338e+00 2.8726101452768167e+00 3.8147636089102748e+00 0 2 -1
+468 1 5.6059790271973755e+00 2.9975764588671621e+00 3.9816014902843078e+00 0 1 0
+113 1 6.5849924400139104e+00 3.1808065990344776e+00 3.4367361163256929e+00 0 0 1
+47 1 1.0081753047752569e+00 3.9556172416934308e+00 3.8502376136806813e+00 1 -1 0
+474 1 1.9694475511677239e+00 3.9381337754040748e+00 3.4201230948176762e+00 4 1 -1
+343 1 3.0370114219017328e+00 3.9465351732349148e+00 3.7436387890882474e+00 0 -2 3
+257 1 6.0639634839200358e+00 3.9545122043987475e+00 3.7922755780058259e+00 1 3 -3
+422 1 7.3939236737263112e+00 3.9358351587357783e+00 3.4932390476416106e+00 -2 -1 -3
+317 1 3.5159353631335000e+00 5.3479576975233323e-01 4.4869163379906354e+00 -1 -2 -4
+322 1 5.5733359556605757e+00 4.8698103039818835e-01 4.7704783768556309e+00 1 6 -1
+453 1 7.3567123647601029e+00 8.3432787155128474e-02 4.8780449077720851e+00 -2 3 2
+82 1 5.8224871123849942e-01 1.1896491837492689e+00 4.7267276175254507e+00 3 0 -1
+402 1 2.4848673528490579e+00 1.6438582837634208e+00 4.4663750233867230e+00 0 2 3
+201 1 3.5370299676438193e+00 1.4874347892452509e+00 4.7752714735806165e+00 0 -2 -1
+159 1 4.7745775925936407e+00 9.0511891230699659e-01 4.2787903105333260e+00 0 -2 2
+239 1 6.3670232447984816e+00 1.2120460051587267e+00 4.4601763672019308e+00 0 1 -1
+340 1 7.6240687426290750e+00 1.3432987598476136e+00 4.6741720105351368e+00 -2 1 1
+246 1 6.4819140516389595e-02 2.1918155429312383e+00 4.6033031341679633e+00 0 2 1
+268 1 5.2454607296226170e+00 2.0016193334579642e+00 4.2366338410742728e+00 -2 2 0
+238 1 1.7670560071066166e+00 3.0774892421151585e+00 4.7759677543328642e+00 0 0 0
+183 1 2.7500175098986750e+00 2.8066272725663279e+00 4.6976729622871014e+00 -2 0 0
+393 1 3.6570205199594930e+00 3.1994600133365143e+00 4.2220986255821451e+00 1 2 -1
+177 1 4.6077861627416805e+00 2.8922144055644265e+00 4.5492372075807843e+00 0 -1 0
+97 1 2.5306051464072796e+00 3.9363146816844674e+00 4.9489842130852137e+00 2 0 1
+16 1 5.1924745493168265e+00 3.7357318418579575e+00 4.8717896643693246e+00 0 -2 -1
+15 1 6.1086100243278070e+00 4.1952083415168335e+00 4.9407045230806066e+00 -1 0 -4
+467 1 6.6872865628091098e+00 3.3657369472951393e+00 4.6943068780100674e+00 1 2 2
+404 1 8.0052336596808171e+00 3.5358005023997046e+00 4.5274309747477952e+00 2 2 -1
+190 1 9.4380374464947103e-01 4.4583992222855645e-01 5.4653306453144710e+00 1 0 1
+429 1 1.8525920421435695e+00 3.5762708549602490e-01 5.0946475568663319e+00 3 0 2
+356 1 3.9097878727975877e+00 6.1716052733039095e-01 5.4734744838905147e+00 -3 1 -1
+459 1 5.0297951477539122e+00 6.4682655744305417e-01 5.6718176557063691e+00 0 1 1
+140 1 6.3845220800541655e+00 6.6200714540119532e-01 5.4418985717149972e+00 2 -1 -2
+121 1 6.3608122327251349e-01 1.6422337064097718e+00 5.7379184381145940e+00 -2 -1 -1
+76 1 2.6863089369401427e+00 9.2826845731889596e-01 5.1620994403726908e+00 -1 -1 -1
+24 1 4.4731219011860990e+00 1.5605471103773427e+00 5.2523372789875760e+00 0 -1 -1
+215 1 5.5638567276602870e+00 1.5540980050800413e+00 5.2788517723209116e+00 2 1 0
+406 1 7.1965017439495353e+00 1.2927814031197866e+00 5.6667263284723859e+00 0 2 -2
+495 1 8.1400977173034548e+00 9.1998286287268760e-01 5.5427200454151757e+00 6 1 0
+256 1 1.5858394354874155e+00 1.8927406862702940e+00 5.2157829401895270e+00 4 0 1
+348 1 2.8144079358075680e+00 2.0457885486401644e+00 5.4765038200952452e+00 1 -1 2
+18 1 3.7959072614405218e+00 2.4151932066273840e+00 5.1090004037855792e+00 0 -2 -3
+259 1 4.8058181463022738e+00 2.5068639859502841e+00 5.5173228168937900e+00 0 0 1
+261 1 5.9067727879570278e+00 2.4818772803689844e+00 5.0470051254431221e+00 0 0 -3
+433 1 6.6238775496086628e+00 2.4655070126810084e+00 5.8390299036823690e+00 -4 1 0
+119 1 7.4961811313748150e+00 2.2560858298720374e+00 5.2761246161627531e+00 0 -2 0
+181 1 8.7803165796510541e-01 2.5368621724963174e+00 5.1009676967579534e+00 3 -4 -2
+392 1 1.3563447389359846e+00 3.2004762283973394e+00 5.8237816659365569e+00 0 1 -1
+436 1 2.3946011665804514e+00 3.0278168308484168e+00 5.6997814720410966e+00 1 1 2
+210 1 5.9632298308600022e+00 3.3253508577291475e+00 5.7908436280268685e+00 0 -1 -2
+409 1 8.2954742717592467e+00 2.8784369153928258e+00 5.3508660402584347e+00 0 0 0
+437 1 7.8285811502936531e-01 3.5972247646914322e+00 5.1173576054548029e+00 -1 2 1
+114 1 3.5350435499222215e+00 3.3621891050940294e+00 5.4926280066777995e+00 0 -1 1
+431 1 7.2810459589983303e+00 4.1806346853520422e+00 5.1699487852752508e+00 0 -1 2
+296 1 2.7493301442805773e-01 9.8739001637521445e-02 6.5256735912844297e+00 2 0 -1
+477 1 2.7650505314326872e+00 5.9323041848923519e-01 6.1880566096904213e+00 -1 0 -2
+199 1 3.8449072993955937e+00 6.8021783868606100e-01 6.5252139926754094e+00 1 0 -1
+31 1 6.8196091485194961e+00 6.5962784856610523e-01 6.6694400071986637e+00 0 -2 -1
+149 1 7.6325509159351759e+00 1.3582393886225264e-01 5.8995743775761014e+00 0 1 -2
+216 1 9.7752025904362638e-01 1.4078822836813889e+00 6.6728311339234487e+00 2 0 1
+425 1 1.7005586383875275e+00 1.1280428057946255e+00 5.8993957526091192e+00 -2 5 2
+224 1 2.8675797989920913e+00 1.6348520503423174e+00 6.4743030869420952e+00 1 0 0
+352 1 3.7461129077349202e+00 1.6467836683144792e+00 5.9711819974315681e+00 0 2 1
+289 1 5.0071286046398447e+00 1.3773263732616476e+00 6.4710057868740503e+00 2 -1 2
+455 1 6.2266605172975309e+00 1.4566180649712972e+00 6.2025029141131824e+00 0 1 2
+298 1 7.2166245583854813e+00 1.5995071342823830e+00 6.6916596114412981e+00 -2 2 -2
+42 1 8.3616236989495309e+00 1.1193553859054726e+00 6.6834990220304640e+00 1 -1 -1
+80 1 1.7994758316879240e+00 2.0602535563487749e+00 6.2756102858385763e+00 1 -1 -2
+127 1 4.3098326176630284e+00 2.3959981453352577e+00 6.4129162374884245e+00 -3 2 -4
+232 1 5.4258898148163945e+00 2.3715461809208245e+00 6.2879862147301759e+00 1 1 -4
+368 1 8.2199446060636472e+00 2.1052670796140989e+00 6.3974575157449376e+00 -1 -1 -2
+486 1 6.9787179588597281e-01 2.5321347977480562e+00 6.5634477651369929e+00 1 -2 1
+209 1 1.7697882729109665e-01 3.3153947644324067e+00 6.2408024968147000e+00 -1 0 -2
+152 1 3.1591516689848946e+00 2.6297173021869003e+00 6.2970081587154914e+00 -1 3 2
+353 1 3.7775468094107074e+00 3.3545010039524996e+00 6.5652719772105916e+00 0 -2 1
+361 1 7.1568303860427589e+00 3.3402954049017803e+00 6.0487812936573579e+00 1 1 0
+391 1 1.8477885811255761e+00 3.9829271272845177e+00 6.5960694610186286e+00 2 0 -1
+464 1 2.8125899145281190e+00 3.9218837233739201e+00 6.1411786897515759e+00 -2 2 1
+497 1 4.5524588331963729e+00 3.4967999912980696e+00 5.9321819433594642e+00 1 -2 2
+498 1 2.0830532473234906e+00 1.1220036849985102e-01 7.1674325217309276e+00 3 3 -1
+264 1 3.1008573853993013e+00 7.6076625369238260e-01 7.2316131354301971e+00 1 1 2
+265 1 4.6342157774659736e+00 7.4471207241976944e-01 7.1556952519725225e+00 0 -1 1
+350 1 5.7838647346068255e+00 5.9645078928256690e-01 6.9763870948006925e+00 2 -2 0
+144 1 1.9122165150526358e+00 1.1329756208528616e+00 6.9185463020768152e+00 0 1 1
+17 1 3.7256587791934250e+00 1.6333740129131904e+00 7.1341698423474043e+00 0 0 0
+441 1 3.6115685474483350e-01 1.9101453221098965e+00 7.5303824824165133e+00 0 1 2
+266 1 1.4762191689510862e+00 2.4662450761248356e+00 7.5398542897976331e+00 -3 -2 -2
+375 1 2.3309589830172550e+00 2.0436669163885055e+00 7.1892097709618481e+00 0 1 0
+351 1 4.7143585284219309e+00 1.8434829864646978e+00 7.2780594229535307e+00 1 0 1
+254 1 5.9170265466448875e+00 1.9140788620193201e+00 7.1554204752492074e+00 0 1 0
+29 1 1.5191420610560982e-01 2.9876711650320327e+00 7.5404622831951533e+00 1 -2 0
+198 1 1.9302038195524280e+00 2.9894398797282253e+00 6.7310853131736357e+00 2 -1 -2
+197 1 3.3083005500744571e+00 2.6144599213348498e+00 7.4059907643060248e+00 1 0 2
+192 1 4.3810729888763209e+00 2.8557380429470860e+00 7.4007735378272006e+00 1 0 1
+220 1 5.1223945377780948e+00 3.3376038243952149e+00 6.7920010375935584e+00 0 0 1
+432 1 6.2458841824934863e+00 2.9482647398415351e+00 6.8353911959966540e+00 0 2 -1
+20 1 7.5153792083913968e+00 2.5459516140888447e+00 6.9548679851000408e+00 3 -1 -2
+63 1 1.0659006163620912e+00 3.4796280956085210e+00 7.1592343773906313e+00 0 1 -2
+446 1 7.1139268453604680e+00 4.0629892995134789e+00 7.0800362551664415e+00 1 -3 -2
+172 1 8.3244437231535144e+00 4.0795393733923691e+00 7.1857426258997332e+00 2 0 -1
+405 1 2.0158180111850474e-01 7.3000938410316307e-01 7.6191994505829879e+00 0 -3 0
+323 1 1.4017230037752237e+00 4.6050949606146502e-01 7.9006469000953414e+00 2 1 -1
+428 1 2.4287753885012338e+00 4.6438480937363963e-01 8.3560508824421493e+00 3 0 2
+280 1 3.6502596381287908e+00 7.8205464112991063e-01 8.2256571665042131e+00 3 1 -1
+54 1 5.2270258856938900e+00 5.3851997974995880e-01 8.1047016704593808e+00 -1 0 -5
+397 1 7.4044521194737944e+00 1.4880482327788007e-02 8.2278445496777337e+00 1 0 2
+279 1 1.2356833818326960e+00 1.5625251472088779e+00 7.9109615213928013e+00 0 4 -2
+134 1 2.5628613498785486e+00 1.5781156241932268e+00 7.9994516338459301e+00 0 1 -1
+236 1 4.5573192582583912e+00 1.4401963680745278e+00 8.1580516205465621e+00 0 3 -3
+493 1 6.1321864263285528e+00 9.9745261592364498e-01 7.9044637123188437e+00 0 3 -3
+346 1 7.4631508711766701e+00 8.7980227698742530e-01 7.6161632460447608e+00 2 -1 -1
+295 1 3.6674081936683844e+00 2.3602153939048316e+00 8.3119852957626765e+00 0 1 -2
+164 1 5.5460188609614756e+00 1.8259166833804881e+00 8.1712445391317772e+00 -2 -2 -1
+354 1 6.6872220974400829e+00 2.0300640265600358e+00 7.8561422683464270e+00 1 2 0
+449 1 7.9034553200360103e+00 1.8534144914629234e+00 8.0299472682703943e+00 -1 -3 0
+185 1 9.5448193278219684e-01 3.3578879558028460e+00 8.1917731250955708e+00 1 1 -1
+229 1 2.5099351180061946e+00 3.0097254013180406e+00 7.8296711975898541e+00 -2 2 -1
+135 1 5.5895093348111047e+00 2.7786209311728598e+00 7.5930433616327404e+00 -1 0 -2
+211 1 7.0529928021063730e+00 3.1919477500830000e+00 7.6370125321552074e+00 -2 2 1
+81 1 1.8872269150105381e+00 3.9351057043865243e+00 7.6163648141042426e+00 0 -1 1
+37 1 3.6614705509244851e+00 3.4915765263742262e+00 7.7267865376074960e+00 2 0 1
+355 1 4.5831332879360058e+00 3.8667909487066860e+00 8.3349240075214972e+00 1 -2 0
+141 1 5.4975686789962985e+00 3.7534665361901731e+00 7.8536976598839008e+00 1 3 1
+122 1 7.2999342247943373e+00 3.9738648524411286e+00 8.3865006440763370e+00 0 -1 -2
+327 1 3.2503689184711210e-01 4.6592524224991747e+00 8.2659471361477532e-01 0 -2 2
+386 1 1.1858359917362140e+00 4.2742921253409367e+00 2.8183822072239956e-01 -1 -2 5
+59 1 1.8702404230468521e+00 4.9717730076471947e+00 7.6874981917086671e-01 0 2 -3
+57 1 2.8167286953460633e+00 4.2569068178507328e+00 1.1953882254793591e-01 1 -1 1
+489 1 3.6302378764516323e+00 4.4521177967574310e+00 6.6729235313037527e-01 1 1 5
+326 1 4.9856357064009993e+00 4.5330987087989216e+00 5.2454407858907726e-01 -1 2 2
+359 1 5.9913033298564331e+00 4.3362327743167306e+00 3.7835915829420080e-01 2 -1 -1
+349 1 7.2426723526689933e+00 4.7181533524380885e+00 7.2707643667494415e-01 0 1 -2
+247 1 8.6433343008963215e-01 5.6823007167579762e+00 6.5224250416194052e-01 1 -3 2
+365 1 3.8233209759419231e+00 5.6939269324333672e+00 5.9607489711922135e-01 -1 -1 0
+382 1 5.1137068164970731e+00 5.5805604939667059e+00 1.8276059551769425e-01 -1 -2 1
+130 1 8.2482820181805554e+00 5.5968590423898927e+00 4.4653409437071251e-01 -1 -1 2
+471 1 1.7119665678072469e+00 5.8953695416344436e+00 4.5930053740843135e-02 -2 0 1
+21 1 2.5641267140454049e+00 6.3080369711057624e+00 5.3936073769936865e-01 1 -2 0
+499 1 4.7164689695839286e+00 6.6465500795405017e+00 1.6611623106065540e-01 -1 -1 1
+253 1 5.9043290983731245e+00 6.1331552003521068e+00 7.5404520106110218e-01 3 -1 0
+302 1 6.9564884663410282e+00 6.1640675729464434e+00 5.7033134187391044e-01 0 -1 -1
+324 1 8.1992983060509594e+00 6.5972265856779417e+00 7.7494747996129210e-01 1 -3 2
+154 1 1.8996589089571863e+00 7.1193783454022102e+00 5.1404963792269565e-01 -3 -1 -1
+284 1 3.0007790559640597e+00 7.2198187648711993e+00 2.3642486802169203e-02 0 0 0
+161 1 3.8133207311484072e+00 7.3201299304675667e+00 5.0202507839899535e-01 -2 -4 -2
+112 1 5.6724794675417822e+00 7.1979165692397187e+00 7.3233205047939343e-01 -1 -3 -2
+39 1 6.4178033104766934e+00 6.8891409921259603e+00 1.5315709015991064e-01 0 0 2
+269 1 7.4906317471214443e+00 6.7375050108085421e+00 4.8287801879068511e-02 3 1 0
+470 1 2.2822847309207428e-01 7.8917014242385868e+00 7.7675485207454809e-01 0 0 1
+84 1 1.3672512040847244e+00 7.9379711016796204e+00 1.6230344562139010e-02 1 0 -3
+381 1 2.6740213254054948e+00 8.0161072175846630e+00 5.8246256296805565e-01 -1 -1 1
+416 1 3.6213693754100857e+00 8.3576521057616233e+00 6.2331560400527297e-01 2 2 -1
+182 1 5.1660049409085866e+00 8.0423641799361949e+00 3.0675573970032322e-01 -2 0 -1
+347 1 6.5429973279348026e+00 7.8912499362162034e+00 4.9818784876300565e-01 -1 1 -2
+380 1 7.4990421428201417e+00 7.6756533739549013e+00 7.5184655726615468e-01 -3 0 0
+19 1 1.4643514526638863e+00 4.2228380130315317e+00 1.3843336624933156e+00 0 3 -1
+430 1 4.3057981012626918e+00 5.0291601875436083e+00 1.2152759600723888e+00 1 1 -2
+212 1 5.0477378389079224e+00 4.2727344176247888e+00 1.5457805058503180e+00 -2 -1 2
+479 1 7.9063585867776194e+00 4.9679041912501303e+00 1.6188677256489217e+00 1 3 -1
+217 1 2.5218672599222897e-01 5.8414577361250837e+00 1.5087288813722908e+00 2 1 2
+147 1 1.1545026869668784e+00 5.3345315151941692e+00 1.5114087897016817e+00 1 2 -1
+13 1 2.8751716646412713e+00 5.2058560720828826e+00 1.0454367061984737e+00 -2 1 -1
+170 1 5.1473087472466297e+00 5.7979187654554183e+00 1.4646868436401976e+00 1 2 0
+204 1 6.2817592200581442e+00 5.1972845711964553e+00 8.9956483663207842e-01 -1 -1 -1
+306 1 8.3865328816009488e-01 6.6369969836196416e+00 9.5403861420405511e-01 1 -1 3
+414 1 1.6815920771923107e+00 6.1626367729687450e+00 1.1744973520180511e+00 0 2 0
+79 1 3.3223472300560521e+00 6.2059256452802716e+00 1.3962300133956274e+00 0 0 0
+116 1 7.6213827835593815e+00 5.9109006810695996e+00 1.2032264335528675e+00 -2 -1 3
+231 1 2.7502476364406934e+00 7.3933926752228123e+00 1.4247539952768773e+00 3 -1 1
+171 1 4.7071880050572981e+00 6.8064285791523735e+00 1.2807974821888473e+00 0 -2 1
+273 1 7.0329000376929018e+00 6.8584131720717227e+00 1.6693583798609657e+00 -2 1 -1
+260 1 8.0496857982994889e+00 7.4547112671714686e+00 1.6374112984134654e+00 3 -1 -2
+43 1 1.0723712760276345e+00 8.0330657119523838e+00 1.4245919245059160e+00 -2 1 0
+208 1 4.3910624755767662e+00 7.9590156044378260e+00 1.1638330407357564e+00 -2 -2 -1
+226 1 5.5077902922207063e+00 8.0548948337249193e+00 1.2764179622713128e+00 0 0 2
+370 1 6.4142184578798505e+00 7.6748965104033502e+00 1.5118878610721513e+00 1 1 1
+85 1 4.9493682223295465e-01 4.4925581352788218e+00 1.7962077237252332e+00 2 0 0
+213 1 2.1575495906041722e+00 4.9142597626853277e+00 1.8811900528366003e+00 0 3 0
+299 1 3.3659976969419567e+00 4.3662606017243588e+00 1.7255850309353928e+00 3 -1 -2
+249 1 4.7082373488231886e+00 4.9470082900514569e+00 2.3877787475724284e+00 0 0 1
+258 1 5.7775488222451887e+00 4.8696821219175135e+00 2.2927307255193869e+00 -1 0 -1
+388 1 6.8398302583746604e+00 4.7642336931962808e+00 1.8670074890749995e+00 1 -4 -3
+91 1 7.7108281252011066e+00 4.3462922152329373e+00 2.4115614404022634e+00 2 -2 1
+415 1 1.3354264256961873e-01 5.2683808667768846e+00 2.4940378524658144e+00 0 3 0
+27 1 2.0564207334277329e+00 5.6999752852325889e+00 2.4901436865643838e+00 0 -2 -1
+465 1 3.6077777180133985e+00 5.3106225786683270e+00 2.2984375934854913e+00 1 4 2
+72 1 1.2115862831734683e+00 6.3271145377400213e+00 2.3088380865765914e+00 -1 -2 -1
+58 1 2.4730215910430200e+00 6.4108021967014599e+00 1.8872921058165837e+00 2 -2 2
+88 1 4.2468496124355051e+00 6.0000736368680991e+00 1.9689248186008255e+00 1 0 -1
+6 1 5.2824888171486872e+00 6.1177755165021708e+00 2.4648490036232666e+00 0 -3 1
+75 1 6.5540148621238368e+00 5.8837981825070473e+00 1.8049347572360399e+00 -3 0 4
+22 1 7.8390642986715990e+00 6.6390504342942007e+00 2.2941937725662940e+00 0 1 0
+128 1 5.0900588055916374e-01 6.9175984002222233e+00 1.8531623061329514e+00 -1 0 1
+193 1 1.7616411201473283e+00 7.3777929885216569e+00 1.8819221158557660e+00 -2 0 0
+389 1 3.8054852300865960e+00 7.1908689074118346e+00 1.6897648139570383e+00 1 -1 -1
+241 1 3.3304867419800805e+00 6.7201719117843943e+00 2.3881392654044515e+00 0 1 0
+14 1 5.6091745306383807e+00 6.8869789388403264e+00 1.7385824045568810e+00 0 -1 0
+307 1 2.6028552221612093e+00 7.5983757451277896e+00 2.5134014261731092e+00 -1 1 0
+401 1 3.3924470446908370e+00 8.2185878004411617e+00 1.7224400376656741e+00 -3 -2 2
+248 1 4.8372436612670020e+00 7.6440956091999794e+00 2.0947200376368853e+00 2 -3 0
+335 1 7.0823280457514208e+00 7.5905134700352388e+00 2.4694687185743178e+00 -1 0 0
+281 1 7.4000691269637358e+00 8.3890728034516613e+00 1.8965880690997261e+00 -1 -1 0
+33 1 2.6215904333511340e+00 4.6944190932346643e+00 2.8031190571610427e+00 1 1 1
+221 1 3.7745777664099962e+00 4.5666560881588492e+00 3.0592511133135365e+00 0 -1 1
+251 1 6.7715334943605834e+00 4.6401458096684580e+00 2.8510683358464712e+00 1 -2 -2
+138 1 1.1922779528936784e+00 5.1973855619084013e+00 2.5689556972218521e+00 0 1 0
+73 1 4.3331751132449305e+00 5.7904710852531096e+00 2.9618985356431899e+00 0 -2 2
+237 1 6.2300560736724488e+00 5.7475979505901522e+00 2.7248929404434143e+00 3 0 0
+77 1 7.3913602469000059e+00 5.6780386160747609e+00 2.6234944353061729e+00 -1 -1 3
+339 1 2.9546703425028309e-01 6.2132195219954411e+00 2.7867557714215443e+00 0 0 2
+69 1 1.3562182919504069e+00 6.0134582221288868e+00 3.2785943115156346e+00 0 -2 1
+150 1 2.9668876558472732e+00 5.9472670316673391e+00 2.8936146954206632e+00 3 -2 -1
+219 1 1.1608118939764323e+00 7.3228681938207032e+00 2.7126116817865631e+00 0 -3 -1
+9 1 2.0627854045350777e+00 6.7454186793237145e+00 2.9360002216641958e+00 0 -2 -2
+3 1 2.9445094497077151e+00 7.1384526732484925e+00 3.3587771406211751e+00 2 -1 -2
+93 1 3.8294061315771235e+00 7.5378392781893275e+00 2.8380550678413003e+00 -1 2 0
+68 1 4.5306997862577427e+00 6.7390306077267583e+00 2.7235217150491495e+00 1 1 3
+35 1 5.4701910601523354e+00 6.9880160912814793e+00 3.3190052755169512e+00 0 0 -2
+174 1 6.2873908725084267e+00 6.7805871501360224e+00 2.7219816088319484e+00 -1 0 2
+369 1 1.7965435829527715e-01 8.0388194202620209e+00 2.5940286664394834e+00 -3 -2 1
+469 1 4.9516921171039039e+00 7.8826764349301754e+00 3.0530014528334970e+00 0 -2 -1
+270 1 5.8608285132223106e+00 7.6946678024150144e+00 2.6778659829007898e+00 0 -1 -2
+196 1 7.6768577919393455e-01 4.8792664640921366e+00 3.5903070116215088e+00 3 -1 1
+235 1 4.2285310787746502e+00 4.2638690385284841e+00 4.1560966227762339e+00 1 1 1
+104 1 4.9767847616154004e+00 4.2048445393860883e+00 3.3962447019771562e+00 -1 -2 4
+363 1 5.8883238331527643e+00 4.9482373301020921e+00 3.4294316309792547e+00 0 0 3
+440 1 8.3695482926311691e+00 4.2129368246316421e+00 3.4018022112738615e+00 -1 2 1
+271 1 1.8521235523587734e+00 5.0753345126075313e+00 3.3807703470063983e+00 0 2 1
+385 1 3.2904276665596646e+00 5.1796485809465160e+00 3.7980367644038786e+00 -3 -1 0
+367 1 4.4116421156217953e+00 5.2602223360914149e+00 3.9453911592241386e+00 1 1 0
+8 1 5.3112651984610038e+00 5.7540585999932050e+00 3.4831459566385630e+00 3 1 0
+445 1 6.8973470747727381e+00 5.7304789961834581e+00 3.7043310817118127e+00 -2 -1 2
+95 1 8.3390772303693836e+00 5.5440620917552232e+00 3.7707087770664116e+00 -2 1 -3
+227 1 2.6907357337748437e+00 5.9773212885609901e+00 3.9377639031978284e+00 -1 -2 0
+45 1 3.9484717591019058e+00 6.4426795460159951e+00 3.6915638730283460e+00 0 1 -1
+62 1 6.2921228111843934e+00 6.5338531152740655e+00 3.8634903896998787e+00 1 0 -3
+309 1 7.4708902093515182e+00 6.5908652825436720e+00 3.6007172393059794e+00 0 -2 0
+70 1 1.1631280783050528e-01 7.0230287218135894e+00 3.4578008409773004e+00 -1 -2 2
+285 1 1.4425208756199777e+00 7.4192404188139784e+00 3.7903676842933867e+00 0 1 -1
+387 1 2.1212416378368082e+00 6.7926691438538640e+00 4.1965603113934069e+00 1 -2 -1
+484 1 4.6356188966878253e+00 7.3783035166039177e+00 4.0565969174155132e+00 0 -1 -1
+98 1 6.8865210518165370e+00 7.3140090488700187e+00 4.1918733358514384e+00 0 -2 -2
+282 1 6.6401301602074470e-01 7.9757076330778913e+00 3.4568068256572881e+00 0 0 2
+203 1 2.5721741898113022e+00 7.9369081377447577e+00 4.0156309667060688e+00 0 3 0
+189 1 3.6912207664687133e+00 7.6806375475121094e+00 3.9059717431750585e+00 0 -1 0
+267 1 6.5713653654298607e+00 7.8128923355065503e+00 3.4078724648171090e+00 -3 -3 2
+155 1 7.9603563078893247e+00 8.1399268391964696e+00 3.4070072411074963e+00 -2 -3 -1
+313 1 1.7289301235151016e+00 4.3022360027322257e+00 4.5514630316827649e+00 0 1 -2
+250 1 3.3811553057719541e+00 4.4820133651244189e+00 4.8062617116575304e+00 1 1 1
+49 1 5.2294045670482578e+00 4.6507731504297771e+00 4.3588420415784839e+00 -1 2 -1
+111 1 6.9650608689197728e+00 4.6666276346705935e+00 4.3879356756979719e+00 1 1 -1
+124 1 8.0933085752880096e+00 4.6382186065764790e+00 4.3125441053619298e+00 1 1 -1
+488 1 5.0933114935797996e-01 5.1359886989747769e+00 4.5948583401263194e+00 -3 1 -2
+48 1 1.5851307073885004e+00 5.5767207395220701e+00 4.2311661806996650e+00 2 -2 1
+123 1 2.5153837170534041e+00 5.0458650845269890e+00 4.5131929409242613e+00 -1 -1 -1
+423 1 5.8208012531410862e+00 5.4948336589690365e+00 4.5916227902397422e+00 2 1 2
+329 1 7.4993364070942770e+00 5.4866100873090584e+00 4.5194723854672301e+00 -2 -2 -2
+334 1 7.1205767430238509e-01 6.1834005542640256e+00 4.2284519502384050e+00 0 1 -1
+146 1 1.3925739327604780e+00 6.6428737862846399e+00 4.8806888882719424e+00 0 1 -2
+500 1 2.2236367185474757e+00 6.0145074663436873e+00 4.9428276989368829e+00 0 1 1
+46 1 3.3290620592760245e+00 5.9295614805296450e+00 4.9789176350871935e+00 1 -1 -3
+374 1 4.7036984817853460e+00 6.2326093994181564e+00 4.3534332596232463e+00 -2 3 2
+52 1 6.8604388860085521e+00 6.3207993870641257e+00 4.9959828655023273e+00 0 -1 -1
+466 1 8.0663702260053096e+00 6.4797694103942982e+00 4.3885769098322607e+00 0 -1 -1
+272 1 6.3857634591010626e-01 7.4306626388140797e+00 4.4685908728843007e+00 -1 0 -1
+331 1 3.1923815943971330e+00 6.8406621847783482e+00 4.3673687027144910e+00 -1 -2 0
+12 1 5.5841339297795631e+00 7.2882403606371549e+00 4.4466343965097535e+00 4 3 0
+178 1 7.5912726284672463e+00 7.0457277620611389e+00 5.0302483408547527e+00 1 -2 0
+376 1 3.5187241629329796e-01 8.3977706803073371e+00 4.6278412560289910e+00 1 -5 2
+117 1 1.3379826357324891e+00 8.3239351364630743e+00 4.2945291322530599e+00 3 0 1
+419 1 1.8896223988418746e+00 7.6393579746671740e+00 4.8168734901107717e+00 0 1 2
+379 1 3.9801436864751545e+00 7.7556819889833708e+00 4.9922836416209275e+00 -1 -2 0
+53 1 4.7488025467298804e+00 8.2113497019933241e+00 4.7088252949914393e+00 -3 0 -1
+28 1 6.4645893658779787e+00 8.2802587137451660e+00 4.5908064174413035e+00 -1 0 -2
+300 1 8.0497419370896441e+00 7.6196002867575432e+00 4.2909429383935933e+00 0 3 0
+83 1 1.5533997706565803e+00 4.2394383012204173e+00 5.5726999917011435e+00 0 0 -1
+304 1 2.5788446900543724e+00 5.0166498820006939e+00 5.6119573167673691e+00 0 -1 0
+51 1 4.2443804124641717e+00 4.2352637746580450e+00 5.1927633388220640e+00 -2 -3 1
+180 1 8.3861058182094546e+00 4.2682396578102644e+00 5.2739914655295381e+00 2 -2 0
+60 1 1.6015559581031933e+00 5.1463292330916399e+00 5.1677022459932900e+00 3 -3 0
+255 1 3.9377408204864559e+00 5.5446289337866919e+00 5.6880007643189954e+00 -1 1 -1
+336 1 4.8593163697287496e+00 5.2245778091909907e+00 5.0488368666183758e+00 0 -1 0
+74 1 5.8562368106306470e+00 5.1343487602136690e+00 5.4844996621256241e+00 -1 -1 -1
+11 1 6.8757956599703460e+00 5.1962144116573894e+00 5.2833696751528123e+00 1 -1 1
+106 1 7.8813740148695439e+00 5.1326569274050362e+00 5.5073989898629776e+00 -2 0 1
+342 1 6.9861903942379322e-01 6.2397324279420259e+00 5.5554818983126264e+00 1 2 1
+366 1 1.5648860489287066e+00 5.8898927909510999e+00 5.8373116466465662e+00 -2 1 1
+153 1 2.7314006683241487e+00 6.0747252624615822e+00 5.8658792109688322e+00 2 0 0
+384 1 3.7649825100367740e+00 6.6140228235604797e+00 5.7068894568556274e+00 3 0 -1
+444 1 5.5919895424131791e+00 6.4007885907393334e+00 5.0609268438363237e+00 0 0 1
+169 1 8.1101285040334883e+00 6.0263607659074259e+00 5.3262646023669920e+00 2 -3 2
+312 1 1.5049751572049341e-01 7.0899457593186419e+00 5.3993285616214672e+00 -4 -1 2
+131 1 2.1405833530478784e+00 6.8932472731161107e+00 5.7016479425245992e+00 -1 -2 -5
+290 1 2.9965099079972810e+00 7.1703954961127128e+00 5.3705836189910157e+00 3 2 0
+240 1 4.7341951239220323e+00 7.1100411829110195e+00 5.1242936547927025e+00 -1 -4 5
+403 1 1.1346681749692125e+00 7.6627110490212882e+00 5.5800616996766177e+00 -3 2 3
+438 1 2.6845809734218404e+00 8.2016169024289791e+00 5.0687325841194584e+00 0 1 1
+420 1 4.5890023008209200e+00 8.1137042645946345e+00 5.6817522441295134e+00 -1 -3 -3
+175 1 5.5503185230040479e+00 7.9962207684468503e+00 5.3390442156703726e+00 -1 -2 1
+305 1 6.7862386693055941e+00 7.6108734242812508e+00 5.4748415197806564e+00 -1 0 0
+341 1 7.4912815420537060e-01 4.3291768420078673e+00 6.1635194432999105e+00 -3 0 0
+291 1 3.3895079643780628e+00 4.7034241504608403e+00 6.1615817441829774e+00 -1 0 1
+372 1 4.6184126650372983e+00 4.6292373343220516e+00 6.3125880555070495e+00 1 -1 -2
+293 1 5.5228203863122962e+00 4.2760181362826257e+00 5.8887719440294752e+00 2 2 2
+461 1 6.6292898049431850e+00 4.3535966489510214e+00 5.9130834363303126e+00 0 -1 -1
+263 1 7.7259268091094802e+00 4.4631499768470926e+00 6.3337285974251110e+00 -3 0 0
+473 1 5.8703225638324330e-01 5.3945075957215893e+00 6.1889975824903258e+00 1 -2 0
+166 1 1.6857123760309740e+00 5.0889285259544463e+00 6.6009674185523135e+00 2 2 2
+132 1 6.2122393744706681e+00 5.4126385838431412e+00 6.6911128610756689e+00 -1 2 1
+32 1 7.1447545314606753e+00 5.2817185569927805e+00 6.2574893275776935e+00 -1 1 0
+475 1 3.1727782520787395e-01 6.3991068893590928e+00 6.5410008153956287e+00 -1 2 0
+283 1 3.2886765713157047e+00 6.4577609617474216e+00 6.6450211825759427e+00 -2 5 -3
+205 1 4.9096514414971573e+00 6.2573093659948649e+00 5.9509143564095108e+00 1 1 0
+158 1 6.0709604726562620e+00 6.0618280221451721e+00 5.9241072976562341e+00 2 -3 1
+1 1 6.8533858129590133e+00 6.5562074315723597e+00 6.5648993855676077e+00 -2 -1 -2
+485 1 3.8125590702509465e-01 7.4616693598979804e+00 6.3609885403345947e+00 0 0 1
+378 1 1.3081995382782592e+00 6.9194359283210023e+00 6.2966360730695206e+00 -1 1 2
+71 1 4.2456384497919455e+00 6.8188407430282236e+00 6.6601493363974615e+00 -1 -4 -4
+320 1 5.9836892193800049e+00 7.1597807912498039e+00 5.9293654675383980e+00 -1 -1 -4
+481 1 7.6691355446374123e+00 7.2329848679784536e+00 6.1548343866270958e+00 2 0 -4
+318 1 1.3774303149915195e+00 8.3818831122591373e+00 6.3319633954681702e+00 -1 3 3
+496 1 2.5904412000483283e+00 7.6460489519926949e+00 6.2257027413180230e+00 0 1 0
+319 1 3.7413059213345305e+00 7.5724262762451815e+00 6.0123092822785358e+00 -2 0 0
+338 1 5.3628717868866635e+00 8.0629328638364353e+00 6.4489666989888104e+00 0 0 2
+358 1 6.2532288322758314e+00 8.3160028104100370e+00 6.1106737733333700e+00 1 1 3
+225 1 7.1188694406828850e+00 8.0966599202958953e+00 6.7037213079900431e+00 0 0 1
+55 1 1.0372861368831470e+00 4.4696141893250934e+00 7.4049774120079697e+00 -1 2 -1
+362 1 2.7501402171762614e+00 4.4083355662417896e+00 7.3996418874276850e+00 1 -1 0
+36 1 3.7396318187704796e+00 4.4801126663955504e+00 7.1074085083223073e+00 2 0 -2
+133 1 4.7860445539287522e+00 4.2902205437459102e+00 7.3711360387569274e+00 -1 -3 -2
+412 1 5.8687821857033606e+00 4.2035753083199188e+00 6.8227572622514323e+00 -4 3 0
+89 1 9.1212528791654535e-02 5.3554487915969240e+00 7.1088625783964092e+00 0 -2 0
+38 1 1.2571311964388701e+00 5.7525434192639207e+00 7.3210178579367868e+00 -1 -5 -2
+337 1 2.7511266822038212e+00 5.4161467707559394e+00 6.9980055112843935e+00 1 -2 -1
+233 1 3.8756258886529467e+00 5.7508116329978627e+00 7.0533607828281815e+00 0 0 -1
+442 1 4.9084237493694953e+00 5.5326539633780083e+00 6.7666717819135362e+00 -4 -4 1
+143 1 5.5776680237268401e+00 5.1621486864820874e+00 7.4635021710959082e+00 1 2 -2
+194 1 7.1453664245542576e+00 5.0984438205910134e+00 7.3333812213368574e+00 0 2 0
+308 1 7.5489760640996861e+00 5.8433222820659871e+00 6.9295207313931844e+00 0 0 0
+286 1 2.1326326019163346e+00 6.2556170719743678e+00 6.7379074391662934e+00 2 0 -2
+61 1 5.6654689088882648e+00 6.2935208603774795e+00 6.9534505775907345e+00 -2 1 -1
+426 1 7.2244687144599562e-01 7.0806885112202815e+00 7.2261712082790313e+00 -1 1 -2
+450 1 1.7213783340257280e+00 7.5184636193370302e+00 6.9767252413839369e+00 4 1 1
+188 1 2.8213470931093725e+00 7.2342621084044723e+00 7.1806231926796817e+00 0 -2 -1
+94 1 3.8026924767351562e+00 7.3932541518378949e+00 7.4596688811177305e+00 -1 2 -2
+252 1 5.1481924139065534e+00 7.2239227196376481e+00 7.0808756518162852e+00 -2 1 -1
+321 1 6.2155610611385042e+00 7.5199920009866501e+00 7.0964075868161212e+00 1 -1 -3
+156 1 7.9944514874782193e+00 7.0252703915440122e+00 7.2541499279788821e+00 -2 1 0
+167 1 5.6917482122629082e-01 8.1417190335213725e+00 7.4962901784513347e+00 2 -2 -1
+330 1 3.3138446283251373e+00 8.2146300363312132e+00 6.8886408760800295e+00 1 -2 0
+195 1 4.3712446295554876e+00 7.8995454090647614e+00 6.7366600234979277e+00 -3 -1 -1
+78 1 5.1856313345157918e+00 8.2127013524100576e+00 7.4620544007464273e+00 0 -1 1
+157 1 7.9011697675935482e+00 8.1764884669087454e+00 7.4266272279794077e+00 -1 0 -1
+373 1 6.7132530681439118e-01 5.0292807223694753e+00 8.1874843860841438e+00 1 2 -2
+56 1 1.7078519119356448e+00 4.9344764637762015e+00 8.1280520422148399e+00 0 2 0
+315 1 3.6234275175752244e+00 4.9451657793378052e+00 8.0476764469560074e+00 -1 -2 0
+478 1 6.4485922344872577e+00 4.3096951707056732e+00 7.7984645181472576e+00 -3 -1 1
+109 1 8.0579925616842267e+00 4.7034410927383776e+00 8.0870676538969590e+00 -1 -2 0
+357 1 2.6884169416542560e+00 5.4427652520296803e+00 8.2033924998655703e+00 0 3 1
+454 1 4.5709826050185614e+00 5.3780110248528246e+00 7.7764192595368709e+00 1 -1 2
+86 1 6.2280039286639228e+00 5.4780735688869227e+00 8.2809905303020894e+00 -1 1 0
+262 1 7.2349855745224341e+00 5.4460551342647063e+00 8.3066978094955939e+00 0 -2 2
+160 1 6.0572932237795518e-01 6.1031638741150802e+00 8.0606222325454144e+00 2 -1 1
+168 1 2.6926304594044645e+00 6.3106981827881823e+00 7.6217004925278191e+00 -1 -3 0
+145 1 3.6817206885980216e+00 6.4794531849637744e+00 8.1206460236595284e+00 -1 -1 -2
+400 1 4.6165015313151514e+00 6.4768264075412638e+00 7.5742703650137084e+00 1 -2 -2
+395 1 5.5951136272170219e+00 6.3298964082146654e+00 7.9312370644766386e+00 0 0 -1
+297 1 6.6597873155694378e+00 6.3692836044578955e+00 7.6364845321455510e+00 1 -4 0
+207 1 8.0115395128671594e+00 5.9375044868831406e+00 7.8466324008781454e+00 -1 1 3
+460 1 6.6754980612335257e-01 7.0521823363262239e+00 8.3451452216672077e+00 -2 1 -3
+44 1 1.5985553198249884e+00 6.8250382119954756e+00 7.7469635466673052e+00 0 1 0
+377 1 4.7397084186898368e+00 7.4631435688157008e+00 8.0338331164055763e+00 0 -1 0
+411 1 5.8132963527118724e+00 7.4646663474419555e+00 8.0261071780587709e+00 2 -1 1
+396 1 7.0242838781762282e+00 7.3479893628886632e+00 7.6603703429086574e+00 2 3 0
+424 1 8.1516255787718581e+00 7.5530513248894016e+00 8.2349155971796755e+00 -2 0 0
+66 1 2.2362583327280716e+00 7.7449451931440727e+00 7.8847519662235594e+00 -1 -2 0
+371 1 3.1656541571472099e+00 8.2451918974189500e+00 8.0415029551628514e+00 1 -1 1
+186 1 4.1675976318493086e+00 8.3247658596031009e+00 7.8802494669419030e+00 1 -1 -1
+413 1 6.4945159878598346e+00 8.3677431053238660e+00 7.6981134933049891e+00 -2 -1 -2
+
+Velocities
+
+7 -1.5197672199477208e+00 -7.1031250708487148e-01 -4.0950627961412567e-01
+398 9.4644999179644840e-01 -8.0422358764146151e-01 -1.1023964746841350e+00
+173 4.8084218242595870e-03 -1.9383808513915850e+00 9.3439099328992314e-01
+462 -1.6390116924948674e+00 -1.2080683544699562e+00 7.6685759600965364e-01
+383 1.2945118204202577e+00 -1.8370931432093225e+00 1.1758972656982776e-01
+288 1.2113220352829337e+00 4.8786819623543370e-01 7.1439798712590263e-01
+303 3.6232856211831610e-01 -3.7927260062054236e-01 -3.0467891118546553e-01
+491 1.6817821327211264e+00 6.3153250223994289e-02 -5.4827183875981267e-01
+187 5.6164532616475686e-02 1.3250338697636479e+00 1.6291294556417517e+00
+163 1.3950518510134631e+00 -1.7090097072259662e+00 -6.0029200570353536e-01
+345 -8.4635427449528189e-01 1.3013642733988193e-01 -3.7582537476409439e-01
+447 -5.5306523194731427e-01 -1.4583063079290524e+00 4.8086237208854654e-01
+108 -3.1879632824306825e-01 -4.9387838912491844e-01 -3.1046215530949173e-01
+427 2.4456665882970424e-01 1.3737057789182878e+00 -1.7474218101951866e-01
+96 -2.8217287015277487e-01 3.9138367367476778e-01 -4.6114700958040086e-02
+494 3.0303709765292064e-01 -9.2861341821366045e-02 -3.6692009603190118e-01
+360 -4.2709052063514102e-01 -1.5167804301477015e+00 -2.6902074926466563e-01
+179 -1.2403873183879999e+00 1.1368400311662936e+00 5.6653449328883521e-01
+136 -1.5859168963178476e+00 1.0728794555657983e+00 5.0876417522892092e-01
+311 -1.4232560247874224e-01 -8.2802934475265932e-01 1.0289469027582252e+00
+242 -1.3753994053365912e+00 3.0823812109914095e-01 1.2519475129774249e-01
+451 1.0684545083916284e-01 -1.3647250068831975e+00 1.2292458701523420e-01
+118 6.5344809858683794e-01 -6.0529931820910909e-01 -6.6821572295625731e-01
+200 -9.1497459004230530e-01 -7.2783581485393822e-01 -8.5265666689286446e-01
+23 1.0121027757280427e+00 -5.8690115135784995e-02 -1.4283745590726701e-01
+30 -1.2406061152582815e+00 3.7490020292637088e-01 -1.4457935973251330e+00
+26 -9.9581994312158117e-01 -1.2441026541932486e-01 -3.6541440712112250e-01
+452 -9.8588039362152369e-01 1.0706468502992672e+00 1.6282162085617651e+00
+439 -1.2386761163593085e+00 4.3106305328993799e-01 5.0410115305724934e-01
+492 2.7094066326642935e-01 6.6318883302565457e-01 1.5999014287193769e-01
+287 -1.7254693201510321e-01 1.1189096817152451e-01 4.1489954379924110e-01
+87 -1.1416496451717975e+00 -7.8732701356097490e-01 -5.1553483379997289e-01
+191 1.2771897324335735e+00 -1.7496443253093807e+00 5.0507874089360993e-01
+316 9.1440987552186193e-01 5.4700363415289210e-01 9.8622452705158947e-01
+176 1.8575029150961730e+00 4.1139607157872565e-01 -7.4590595351664335e-01
+244 -4.6943526746297531e-01 -4.0598167955478592e-01 1.2148844546685273e+00
+463 -6.0334643189530614e-02 9.0710610807033173e-02 -1.1159129056326442e+00
+101 -4.6185710842519001e-01 -1.0745733134156132e+00 -8.4095960640252343e-01
+456 7.3559492687866046e-01 8.4585098605759870e-02 9.9419285892741016e-02
+90 -6.9201778873218364e-02 4.2798282427303436e-01 4.4566581747153294e-01
+457 -2.4132420839469965e-01 8.4836564798364844e-01 1.0955942184355154e+00
+344 6.5538410556645377e-01 1.0527852545027427e+00 6.8154568558028386e-02
+399 -8.3144103937947644e-01 1.9062875610231711e-01 -5.6419101062513155e-01
+472 7.2667243545881360e-02 4.9535613585667015e-01 8.3691870718219108e-01
+364 -2.9526170592863293e-01 -8.7862728527604539e-01 8.2689159018968139e-01
+333 1.7241045239522232e-01 1.0435495829586168e-01 -4.6109306717747689e-01
+223 3.7919285550573256e-01 -1.0672385659469772e+00 3.6984147519214111e-01
+277 -8.8028901289995187e-01 5.8075379122029680e-01 -5.0555689035944396e-01
+487 1.5229361655153211e+00 -4.3334638823410909e-01 -5.2025315373818937e-01
+202 1.1890248672755632e+00 -1.3103428543183313e+00 1.3877359082264291e+00
+139 -3.8593619396281981e-01 -4.1419081865118412e-01 4.8341157555249414e-01
+218 -1.2190042149414249e+00 8.6603581070857749e-01 5.3180226853912571e-01
+10 4.0346230429792890e-01 8.8022045895227152e-01 3.0611723210999553e-01
+64 1.0687583963535543e+00 -2.5590555538042992e-01 8.2852453062456832e-01
+40 -1.0543471069223134e+00 -7.9810717548135568e-01 1.4389073785681245e-01
+328 3.2979243925573831e-02 3.1076559402174242e-01 -1.6410904905031269e+00
+292 -6.3224783054646483e-01 -4.3704038413109603e-01 -2.2759444134328080e+00
+245 -4.7869651996764956e-01 1.4231085014444791e-01 -1.0109564296885869e+00
+148 -7.1011989912373741e-01 -6.8437374624235792e-01 -4.7084060341785217e-01
+435 -2.3710180935788738e-01 7.3150957873543554e-01 1.9569288343148944e-01
+301 -7.3626400316656140e-02 -1.2518892197053937e+00 6.4938262930099341e-02
+476 4.4522311443074564e-01 8.4413304640745324e-01 6.9180275295562699e-01
+310 -4.4579106235383259e-01 -1.4870237388818175e-01 -1.7519046471808755e+00
+137 1.0191269790481958e+00 -1.1871217736355175e+00 6.0687187776921336e-01
+434 -8.0977924479936902e-01 8.2445677967490594e-01 -7.2982619204505017e-01
+184 9.3329968718567657e-01 -7.8960961760367476e-01 -3.2387020805541877e-01
+5 -7.6591643318776403e-01 4.4567507028100856e-01 -5.1504861374485367e-01
+41 -6.0827814801341495e-01 1.0335818250945215e+00 -1.4262577781586459e+00
+410 5.1617643329833041e-01 -1.2505433325731774e-01 6.5548767709779598e-01
+418 7.4452116863445517e-01 5.5176595694257291e-01 4.8433486134467341e-01
+34 -1.0960797951083094e+00 -9.5559899011366922e-01 4.1467430632337771e-01
+67 -1.4762168791195980e+00 7.3902009632630203e-02 -9.8563314451155759e-01
+390 1.1040922756330935e+00 -1.0608530270906485e-01 -8.5480860400114822e-01
+125 1.1001683944330445e+00 2.5213959563273414e-01 -7.0907876564322847e-01
+105 9.3642884637919491e-02 3.2241122070134182e-01 3.4267503174724429e-01
+274 1.4805711826308454e+00 1.9591264275732104e-01 4.6208005550476272e-01
+314 3.3724005635036658e-01 -4.9228354871213487e-01 -5.9105855919176131e-01
+408 4.1789097671328662e-01 2.2054753884758851e-01 9.9409268338014201e-01
+142 1.3529751978153088e+00 8.8819247946851643e-01 -2.9699787225611667e-02
+25 -3.6747378202427655e-01 -1.6653943785151797e+00 2.1594786196965424e-01
+458 -7.3004682150577682e-01 -2.5761460079892318e-01 -7.5492049253862104e-01
+165 -2.6776300065898789e-01 -3.1428309412647404e-02 7.6464836175889062e-01
+421 -2.2112566002008222e-01 -8.0033416193571816e-01 7.4057822704117926e-01
+151 1.6510010513742961e+00 6.2880202489514836e-01 1.3109463507573997e+00
+162 8.6466591751463506e-01 1.2003096922919751e+00 1.6720745976940152e+00
+120 7.1544273833415639e-01 7.8605685992147967e-01 1.5756692842267452e+00
+448 9.4818399052933544e-01 2.0373333697562079e-01 1.1821449360296239e-01
+50 -7.4255028295716818e-01 3.2131856627302258e-01 7.0344624031236247e-01
+443 5.1965861871733798e-01 -1.0469002555669342e+00 4.3373559100868195e-02
+482 1.9661775056161916e+00 1.8911734358361203e+00 7.2949567046678088e-01
+407 1.8582263860937767e-01 -1.3051257268383373e-01 5.0987931207045945e-01
+100 -3.7775697318477114e-01 6.0919017017007682e-01 7.1809182324625254e-01
+99 -1.2684617496938173e+00 7.3671717137305515e-01 -8.1216571664258830e-01
+107 -2.8776644836089277e-01 5.0874100972210130e-02 -4.8277793530428115e-01
+480 -1.8051331679926971e-01 8.6563042329278228e-01 -5.8352716569019136e-01
+115 -1.0589943552297505e+00 -5.2550981762149152e-01 -1.7244688387853080e-01
+278 1.5768829958501078e+00 1.4428098110470169e+00 8.8216722790329116e-01
+206 -1.3653407453215169e+00 5.7341428096126357e-01 2.2629859809279146e-01
+4 4.2054817038258185e-01 -2.9390858515089308e-01 -1.7875237254421117e+00
+214 -1.0426854223671351e+00 -9.9898728143962490e-01 -1.9850479848636837e-01
+65 4.1792586017239108e-01 8.0186431881714215e-01 -2.0107060384389590e+00
+490 6.7355251805377303e-01 -2.0698786089395388e-01 -3.1564646838967253e-01
+230 -1.3855973935201399e-01 1.6134941051796734e+00 2.3723214232474421e-01
+234 1.9442134326447673e+00 -1.3805634063343805e-01 -9.8210270774821284e-01
+92 1.8063255781217027e-01 9.3716955232457000e-01 -1.9187798219061411e-01
+103 3.9559605419305116e-01 -8.7523159162324915e-01 -8.6993815784441930e-01
+129 1.2253789920819727e+00 2.8171983713246046e-01 -1.2180024131333584e+00
+483 9.6349992375661320e-01 -1.6348662648160852e+00 8.3712630375667085e-01
+110 8.0468512955471783e-01 -1.2623668571621947e+00 1.4617362498240638e+00
+126 -2.5741602043303030e-01 -2.5870568275485217e-01 -1.0282926023825125e+00
+276 7.0448925295125497e-01 -1.4486777483518087e-01 4.0589927727544634e-01
+294 4.9277474123671205e-01 1.0788251906341033e+00 1.0720476037391911e+00
+417 1.2134126298835348e+00 6.9341902173441838e-01 -6.5915650495314448e-01
+102 -9.4530772564542864e-01 3.5633646308084904e-01 2.1645216652503570e-02
+228 -3.1477177829390662e-01 -7.3015136189998553e-02 -1.3550661516185267e+00
+2 -1.5167590438599203e+00 3.2942906513575698e-01 -3.9430403043809092e-01
+222 -3.4853423639550507e-01 4.3402827204271471e-01 -8.9055344775375656e-01
+332 7.6671266929186610e-01 -4.2789189576138797e-01 2.2589706911206284e-01
+275 1.0091776783462640e+00 -2.3875723103898955e-01 9.0625273137588058e-01
+243 -2.4008164703199580e-01 2.4602229249214927e-01 -9.2542387373274426e-01
+394 -1.5496683613302659e-01 -1.4731622236607285e+00 4.3712483588631723e-02
+325 -9.9716991278750156e-01 9.0079916710328489e-01 6.1656781470423368e-02
+468 1.1367478364774437e+00 3.7268550263787792e-01 -1.9334998928228118e+00
+113 -6.1637943298580211e-01 -1.4259739417714792e-01 8.5880390205968704e-01
+47 1.4522907038923785e+00 -3.8138661441744320e-01 8.8902241399148096e-01
+474 -6.5326939744671464e-01 -2.9584216080544323e-01 -1.6703076347144621e-01
+343 -2.0865957540293453e-01 4.2320136734663144e-01 -5.2417372636564219e-01
+257 -1.6598412047016537e+00 3.9614126419385248e-01 6.2190463287430520e-01
+422 9.8621254947118736e-02 -4.5653326655140536e-01 -1.6959752095102018e-01
+317 8.1554926779727022e-01 -9.3952932101149422e-01 4.1213701483558629e-01
+322 9.7692194337623842e-02 7.2221211185210599e-03 1.4983319825497168e+00
+453 1.0112117753398511e+00 -1.6043121320019077e-01 -1.5109389287188097e+00
+82 -1.8245270960375951e-01 -1.7227940931260219e-01 9.4914460423273084e-01
+402 -1.3974163926710530e+00 1.0238276956317611e+00 -2.7116721725643461e-01
+201 3.8154565319070316e-01 6.1162789639306392e-01 5.3616806651343329e-01
+159 -5.5623509138788352e-01 -7.3506001159846490e-01 2.9669217335717424e-01
+239 -1.1703971851987414e+00 -2.5624748584715568e-01 1.9987978519039629e-01
+340 -1.1651635328367398e+00 1.4923285973929103e+00 -5.0638764373602618e-01
+246 4.6916366638116123e-02 3.7471219463751682e-01 -8.5595777185358435e-01
+268 3.9776335718715750e-01 -4.1326407010466332e-01 6.4132644428253849e-01
+238 -1.0102989663900468e+00 2.5145560602393102e-01 1.6537809300909093e-01
+183 9.6649446422314420e-02 -3.1320299584600791e-01 8.9787784698607254e-01
+393 -8.1709715097020719e-01 6.5173585509788723e-01 1.3340272254613436e-01
+177 9.2862946550951486e-01 -7.4734865012317631e-01 2.6370187436425798e-02
+97 4.0324698775550633e-01 -6.5060630846020162e-01 4.3381661619662121e-01
+16 9.2225588537666714e-01 5.0761227093429218e-01 2.8224038915514177e-01
+15 7.4631799122735776e-01 -5.1735298399747642e-01 1.0267699639319139e+00
+467 2.9233724948217432e-01 -1.7768548355681282e-01 9.7602334864839035e-01
+404 -1.5591392530118275e+00 6.9331308976828065e-01 -1.9156738913640683e-02
+190 -2.3293475783192552e-01 1.9804103372721136e+00 1.4400561196029799e-01
+429 -3.1240905856412571e-01 5.3399975662794008e-01 -8.3672459106253427e-01
+356 1.1813499076138427e-03 4.5712322524667531e-01 7.2569550209608535e-02
+459 -8.2536401218028255e-01 -5.1587534016495484e-01 -3.2429633054579987e-01
+140 1.4169132709847332e+00 -1.6948863471126016e-01 -5.4590188862129563e-01
+121 -8.0623019357608017e-02 4.3956025735145637e-01 -4.1230464603429395e-01
+76 1.2149513180720075e+00 3.0664283974167705e-01 -6.2206166881984115e-01
+24 -9.0953792569391767e-01 2.7208504534230199e-01 -5.7968255809907288e-01
+215 -3.6635366137979941e-01 4.9083544993599626e-01 5.4540399677247620e-01
+406 -2.8711421371139068e-01 9.8028410382686626e-01 -1.5688211456737056e+00
+495 -2.6064405757207798e-01 1.0014422650657693e+00 1.2195244915445839e-01
+256 5.1502433217852350e-01 2.1618101498582329e+00 -5.5671498574760148e-01
+348 1.5303758850601952e+00 1.3538700733088898e+00 -1.0404602147830582e+00
+18 8.7092721144608365e-01 8.8296303804909981e-01 4.7076145062566271e-01
+259 1.2579194534410862e+00 1.1275351076124240e+00 1.3255628670657002e-01
+261 -1.9457509303164215e+00 -5.7013962206489510e-01 6.5787967104563000e-02
+433 9.5143226323949670e-01 -1.9787864106930605e-01 2.1852616176467912e+00
+119 5.2387728877999162e-01 -7.5008900728698324e-01 1.0156553043596699e+00
+181 3.6547537534957092e-01 4.4856602347738611e-01 1.9502787843157898e-02
+392 1.8883706922750969e+00 1.0836617564317459e+00 6.8587165903540392e-01
+436 -7.5639783907079583e-01 -6.1811736982444421e-01 -2.2826839230281426e-01
+210 -4.5965909404278121e-01 -3.3285948977797364e-01 -2.8815683118847368e-01
+409 -8.9630394160430427e-01 1.2498156320615575e-01 -5.9486610361558201e-01
+437 -1.3458244785393264e-01 1.1611540390359226e-01 3.6707571520967769e-01
+114 -1.1780626363780731e+00 1.1424256338126497e+00 8.1898562672334474e-01
+431 3.9295632541408759e-01 -5.0466339464883803e-01 7.4701651109545830e-01
+296 -2.9562979314792992e-01 9.3794387166245374e-01 4.3821394647736267e-01
+477 -1.0332993738194893e+00 5.9732959339716152e-01 -1.6990304505644747e+00
+199 -1.0627414724922077e+00 3.6527741326574337e-01 -5.0076911469807539e-01
+31 1.1836258822972918e+00 -1.1805527709262840e+00 -1.0148440310730218e-01
+149 -9.7152762187442065e-01 -8.2689424175864179e-01 -9.8308725322137946e-01
+216 -2.6635793686817422e-01 -1.1249304826477751e+00 5.2675272099922754e-01
+425 -2.8287490221194594e-01 4.1086346332975782e-01 -5.6774387340250076e-01
+224 -6.3636066584070272e-01 3.8927738472590379e-01 -1.2043299828812322e+00
+352 -6.1634031766701480e-01 1.9344774717790295e-01 7.8374630494614250e-01
+289 -9.8440193523267117e-01 1.4471012647779624e+00 4.3922631035860088e-01
+455 -1.1942412759322272e+00 1.1300600257912083e+00 -4.7958784865825908e-02
+298 -8.4955866984285322e-01 1.8131854984076790e-01 -1.9010434504555735e+00
+42 1.1523609145851870e+00 1.3198909771653919e+00 -1.1943568765046571e+00
+80 2.0928302112510155e+00 -1.9281727715628849e-01 3.5879525261694012e-01
+127 1.0275071129063864e+00 -7.8560029105042173e-01 8.4680312545585235e-01
+232 3.2836195573764693e-01 1.5921754951459595e+00 -1.4889040346123832e+00
+368 -1.0143580013676627e+00 -6.3286772474382025e-01 5.9003271203631313e-01
+486 3.0052735920053437e-02 -7.5917073216074904e-01 -4.0707609210832352e-01
+209 8.3084043466989521e-01 -1.2819860426930157e-01 9.8450910112172252e-01
+152 1.4018503697498572e+00 -1.3364969077798206e+00 5.1589841760235022e-01
+353 5.3822763779176630e-01 -4.8322987719532096e-01 3.5634065895494110e-01
+361 6.8264929410893807e-02 -1.3682995245096907e+00 -4.9273501999153174e-01
+391 6.7065636846264776e-01 -2.7904582963633018e-01 -6.5303896626046076e-01
+464 7.4567031841775322e-01 1.8046977002555891e+00 -3.9159771733015164e-01
+497 -1.3856699033357778e+00 -9.3599081481125368e-01 1.8204759707532270e-01
+498 1.0568629143757213e+00 -6.8137997517450560e-02 -3.7037143049187105e-01
+264 -7.7894167957263005e-01 2.7383401106980504e-01 7.5098074183721675e-01
+265 1.9614235016832940e-01 -4.2322665725562197e-01 -1.6236881765469688e-01
+350 -6.3256317352651559e-01 -5.9660876051471468e-02 5.2317095757618959e-01
+144 -2.0800773378554721e-01 -1.5156540919629388e-01 -1.8814379829548375e-01
+17 8.2103760580927843e-01 -1.1053143150308307e+00 1.1843881143235824e-01
+441 9.9191589104231870e-01 -1.9479579195707514e-01 1.8130903870970541e+00
+266 -9.4957121369000017e-02 8.6997989048119007e-02 -1.5903074475142429e-01
+375 8.1973242957273296e-01 1.0075441532082390e+00 6.6277118207442265e-01
+351 -1.0694047025049973e+00 1.2185457298237026e-01 7.2731979237547140e-01
+254 -1.4178061472233576e+00 1.0044925984667965e+00 5.9999609355847139e-01
+29 4.7548776272319854e-01 1.5001558567759110e-01 7.4645778473548241e-01
+198 5.7218364285113010e-01 2.9196622753924861e-01 -1.2231490460337893e+00
+197 -1.4204377171882598e-01 2.1322055429678972e+00 3.5360172437812271e-01
+192 -4.3230000337254881e-01 8.1753260459826260e-01 -1.3961239427992276e+00
+220 9.2338173032465304e-02 7.2444278115958327e-01 4.3240465996750244e-02
+432 3.3837153168712325e-01 1.9757543957217949e-01 -3.2138599665961831e-01
+20 7.2972771576502227e-01 -7.1923547937920451e-01 3.8462300862783037e-02
+63 7.3459759072459618e-01 -2.1393390343408181e-01 1.4606085672716668e-01
+446 9.5489174587921177e-01 -2.2284752482645742e-01 -7.3147917056258960e-01
+172 -2.3233356067616504e+00 5.9696639792861073e-01 1.0311292046090428e+00
+405 -1.1779705391325133e+00 2.7846640825986735e-01 9.6440591359561623e-01
+323 1.2758508066094825e+00 1.1576865878083721e-02 1.3415986450967126e+00
+428 4.8021126796596170e-01 4.8003440312842205e-01 -4.1979524804611819e-02
+280 1.1723141734791545e+00 8.4290272518839332e-02 1.1240738308596074e+00
+54 8.2234236592018339e-02 6.9377897211693754e-02 4.6415665343815465e-01
+397 -2.4516753222359439e-01 -3.8647076314136586e-01 -3.1938187998240442e-01
+279 4.7475740365710301e-01 4.2033019401821775e-02 1.0791025172609856e+00
+134 4.6986570792330234e-01 7.5759398359663410e-01 1.3748348906115773e+00
+236 -1.4338534270949166e+00 6.5242359063738475e-01 -4.6879001422000049e-01
+493 -9.5795155221589912e-01 -9.3131490174346532e-01 5.2848598027407112e-01
+346 9.4017276609564226e-01 -1.4423054432512394e+00 -3.4883627652283825e-02
+295 -1.0211326704350645e+00 -1.5688283678435537e+00 2.2737881805456114e-01
+164 4.2933435120637192e-01 1.2908919636558716e+00 -1.8559649323965055e-01
+354 3.1232180266110654e-01 -3.3884174823706786e-01 2.0012570054806617e-01
+449 -4.1956545717722271e-01 8.5033185910749320e-01 -1.6923286514167504e+00
+185 -1.9847768794898153e-02 1.6093024328157031e+00 -3.4278049008935008e-01
+229 3.3354615877618454e-01 -2.1967770508754222e-01 -6.8288738508757751e-01
+135 -3.8598562184690355e-01 2.3459455181853697e-01 1.4591795655915993e-01
+211 -1.1858788850370010e+00 1.3960363241794649e+00 1.1390210005059993e+00
+81 1.3604301094625607e-02 1.2711357052370638e+00 1.3840699383718491e+00
+37 3.5025606454523878e-01 -1.0316562604143946e+00 2.4944393037279400e+00
+355 -4.2713554752038259e-02 1.6441819966987897e-01 -8.2550044716409909e-01
+141 8.1569462514471880e-01 -6.8558748209733145e-01 7.0575287639668483e-01
+122 3.6274405429300222e-01 -4.1985688325391463e-01 2.5223429679090231e-01
+327 5.2689568302324030e-01 -7.1530570005039407e-01 8.1591540939993761e-03
+386 5.2540176252260506e-01 -8.0891002493580211e-01 -2.1080798818740659e-01
+59 3.8008676468324715e-01 1.2302042558959829e+00 -1.2893140123247515e+00
+57 -1.2756375464672689e+00 7.7885421584605408e-01 9.3218921154320622e-01
+489 -4.8772350194378145e-02 1.1774319824237183e+00 -1.3413963962772402e+00
+326 -8.2073133946066312e-01 -1.2685275916429064e+00 -1.0253535003668284e+00
+359 -1.8991447869255085e+00 -1.2154594584519982e+00 -1.7832247863578221e-01
+349 6.4100123949028309e-02 -3.2723074072385872e-01 -1.5565659217165477e+00
+247 4.5734576435732799e-01 4.9511064545143885e-01 -1.5945768728559153e+00
+365 -5.7917383718454207e-01 -2.3494837692508375e+00 -1.5004840205862926e+00
+382 -1.0813649655944142e+00 4.1831946020927230e-01 2.9306887264755938e-01
+130 -5.7488571729114812e-01 -9.6638702807104448e-01 -1.0278487271582966e+00
+471 1.1085411185196428e+00 -5.2404573748396321e-01 9.9734211160337760e-01
+21 -2.3901903994255250e-01 9.3578484419611557e-02 -7.3719660959071831e-01
+499 -4.9906801820206043e-02 4.0499214176262377e-01 -2.9371307883695258e-01
+253 -7.8928864460108283e-01 3.3838305995790624e-01 7.1275694444227405e-02
+302 1.5254343639867396e-01 -2.7881674166920268e-02 9.3627888140226589e-01
+324 -1.1159492478288651e+00 7.0632546920242867e-01 2.5394407536700253e-01
+154 4.8736880051344117e-01 -1.3177717741071062e+00 -7.9660620460177323e-01
+284 1.7716036485389025e-01 -3.6539348914929748e-01 -4.6941135291661629e-01
+161 -2.7479275242316997e-01 7.6982156884799513e-01 -5.7088165723862649e-01
+112 2.7023059482372042e-01 2.0487127767495705e-01 -4.9350658197380731e-01
+39 -1.6840134362063963e-01 -2.0596641915022665e-01 1.2622422574022685e+00
+269 -1.4448376822412348e-01 4.2047192520573229e-01 -1.5751711248887881e+00
+470 -9.0198936075875380e-01 -1.0035762705605118e-01 1.1469701649179411e+00
+84 -3.7165102348228596e-01 1.5718083932531206e+00 1.8715295090631173e-01
+381 6.6740577340943863e-01 -1.9432536761932276e-01 -1.2757754863002870e+00
+416 7.2118119300841677e-01 8.6813375765446710e-01 -8.2350198387929519e-01
+182 3.3694648494832763e-01 -6.5830649973844163e-02 7.0076361524788766e-01
+347 -2.4949061953800500e-01 -1.1432286442922379e+00 -4.5757988288631407e-01
+380 -4.7143472726668367e-01 -2.9180051952103941e-01 6.0581680882348110e-01
+19 7.3007216948068054e-02 9.4947888518620716e-01 3.4627576178512598e-01
+430 -2.2320112834958328e-01 -3.8559102989803251e-01 1.0427665572858709e+00
+212 2.0170777345036919e-01 -9.9332505765821755e-01 1.0550787824425254e+00
+479 -7.7910620356048310e-01 -3.4759422268929079e-02 -5.8756871125828891e-01
+217 -7.0999315500035393e-04 -1.4874035841198494e-01 -1.1389898361525996e+00
+147 3.3953282916732841e-01 1.0503362153417393e+00 8.8554338624429496e-01
+13 1.4952012032076418e+00 -1.3492024227588162e-01 8.6834761707295621e-02
+170 -7.5230462228731629e-02 1.9875829136564169e-01 3.8512887113920530e-01
+204 -3.1459152727076800e-01 -7.2066080044499392e-01 -7.5277710665622377e-01
+306 8.0529112138128323e-01 5.8633344712571456e-01 -1.8256367575393391e+00
+414 -1.7678147014996259e+00 -7.1706970804552028e-01 6.0614094017430153e-01
+79 1.6303375369704216e+00 -9.6446702691566244e-01 1.7014099837302679e+00
+116 -1.7231587507489068e+00 -2.2374070614774770e+00 1.0442058418221465e+00
+231 -9.2546402309298947e-01 3.2157716287919585e-01 -6.2797312032694141e-01
+171 1.4243635122212095e+00 -3.4034411127116748e-01 2.1060050313379719e+00
+273 1.0777993506600285e+00 -1.7160435918146213e+00 1.0194335249558832e+00
+260 1.4544392536719453e+00 9.3765863648494707e-01 1.9642995061847701e+00
+43 -1.1326443879098980e+00 1.4846909715454837e-01 1.6874301063514463e+00
+208 -1.1234489793683757e+00 -1.0408587064067447e-01 -4.8386417041968832e-01
+226 6.0458834484750512e-01 -4.5998597100022531e-01 -4.1420588246526679e-01
+370 -2.5264443636161833e-02 2.2888129565845829e-02 -7.5123334633027034e-02
+85 8.9124431006375637e-01 1.2150234901900336e+00 -1.3794080039677503e+00
+213 6.8750004752856542e-01 1.7643443473184617e-01 -7.6039731517372366e-01
+299 1.1166507011383160e+00 -6.9342398354435130e-03 4.0031150060326554e-01
+249 1.6676259701674601e+00 -1.2916011404415280e+00 8.3081388251616040e-02
+258 -3.3338543070706111e-01 -5.9677208925291603e-01 1.5474505568531705e-01
+388 3.3338877926510010e-02 3.6534261356710357e-01 -9.3163626937108668e-01
+91 -1.0436539497197659e+00 -3.6206368573074071e-01 -3.9406149981573330e-01
+415 1.8022253921012549e-01 -2.5361248238828202e+00 7.8761564433010600e-01
+27 1.6681493439613866e+00 -6.3832374162264238e-01 5.5825898158707521e-01
+465 -3.3883292906124979e-01 -1.9199928938047977e+00 -1.8916637827626086e-02
+72 -7.0256791239588934e-01 -1.3631374858756479e-01 1.1539466869530803e+00
+58 -9.7431680335045678e-02 -1.0635351953490649e-01 -4.5168665276313896e-01
+88 5.2937320884954953e-01 -1.1749454649084838e+00 1.2744645330784524e-01
+6 -1.1593774469007843e+00 5.1413980407296589e-01 1.2944389470552302e+00
+75 5.7374690805841033e-01 7.8433587872715163e-02 -3.9368611836917572e-01
+22 -9.6996881290452630e-01 1.2965746977075312e+00 -1.5394575986278033e-01
+128 -6.4070276272082638e-01 2.2708380115440285e-01 -1.6142126045669323e+00
+193 5.7443781554989515e-01 -1.2458447329459237e+00 -9.3796775534017041e-01
+389 6.7276417854677451e-01 -2.9571653479618748e-01 -2.4423872850138906e-01
+241 -1.4102001294916395e-02 -9.9059498112109945e-01 -8.4827835518272399e-01
+14 1.0515231184651945e+00 4.5617222115757200e-01 -5.2911246318135385e-01
+307 5.0028759749137040e-03 -5.0699437336696396e-01 -2.5790615969461578e-01
+401 3.9207473498457289e-02 -9.2058641117610335e-02 -1.3689269955592848e-01
+248 1.5980225324156891e-01 -4.1600725109692227e-01 4.1253240562945254e-01
+335 5.6526042010051392e-01 -1.0563866873458263e+00 2.2963431232103770e-01
+281 4.7335967523683825e-01 -9.8764893385752195e-01 1.3275577965292436e+00
+33 9.5713349527393091e-02 -6.7578402785937589e-02 1.2308478723868863e+00
+221 -1.2080117435798665e+00 -2.2142477880517361e-01 -7.4060132361069042e-01
+251 5.1352069062594478e-02 -4.7173153686079722e-01 1.3645828375870801e+00
+138 -9.8931419992230929e-01 -5.6403083699472267e-01 4.8437720371021081e-01
+73 -8.7238230525929461e-01 2.2619668651197614e-01 -3.2904255863286136e-01
+237 1.5966476021876643e+00 -1.0734521639966819e+00 -5.7008225474518526e-01
+77 -6.5731386120913948e-01 -1.3672574981662913e-01 3.4397972232069346e-01
+339 6.8164227902965080e-01 -8.7654828372118265e-01 -9.2289632523091991e-01
+69 -1.0710495228647499e+00 -2.2798359247984892e-01 -1.4017813863798740e+00
+150 1.4728554201464288e+00 1.6873571351133543e-01 -4.9418642333439719e-02
+219 3.5347912349947574e-01 1.9808724875609746e-01 -5.0625815831733678e-01
+9 6.3426493841071574e-01 2.5658018877632760e-01 4.7953937763858034e-01
+3 -1.2144659411870629e+00 8.8297859772359033e-01 -2.6732917639533704e-01
+93 3.3822789154655764e-01 8.1143107440094409e-01 -2.7263548606070248e-01
+68 1.6351351046383883e-01 5.8445604848527588e-01 1.2062643207337951e+00
+35 1.6550671586466750e-02 1.6790869037398232e-01 2.5947900830558479e-02
+174 1.4403688502275100e+00 1.4476094634256031e+00 -1.0038411182890872e+00
+369 -7.9303772394565375e-01 -1.1303432398125628e+00 -1.5457440975201582e+00
+469 -5.5239153419630294e-01 -4.7075634498045837e-01 5.0539291079888571e-01
+270 5.0351525849727374e-01 1.3716919044917866e+00 2.8157430421583601e-01
+196 -2.2101193763774998e-01 9.4257812877032954e-01 -1.2881024224381614e+00
+235 -3.7410168233289759e-01 1.9712488864630789e-01 4.3505985658493368e-01
+104 1.0770774810232915e+00 -7.7420116923795979e-01 6.4885799952777634e-01
+363 6.7513909705285891e-01 2.0186757605257277e-01 6.2260156794788402e-01
+440 -2.7563401138205812e-01 4.0196589304635166e-01 6.8696509786942431e-02
+271 3.5486339796858996e-01 -1.0482634846478769e+00 1.0693870635001128e+00
+385 2.0653637543135839e-01 1.6251902525248125e+00 -1.0240800582063148e+00
+367 1.2090890730479817e+00 7.7304993739281713e-01 -9.1196492155951403e-01
+8 1.5650406952874099e-01 1.4761853226272020e-01 7.1667065222651671e-01
+445 -8.1712119890107360e-01 5.0169175756271378e-01 6.3236875667481124e-02
+95 7.1388432342661989e-01 -2.6058252368101411e-02 -1.0159428151424379e+00
+227 -2.1897046900910261e-01 2.4492515107534649e-01 2.2679621024062299e-01
+45 5.0120245022687604e-01 -1.1380921868404339e+00 7.8232214243213327e-01
+62 5.7973338245234629e-01 -7.9117819757367913e-01 -5.4327640325878901e-01
+309 5.3715909018265540e-01 -1.7844975395461696e-01 -1.5521878040430610e+00
+70 -1.3649193486754410e-03 -1.5242800010205164e+00 1.6225001176581824e+00
+285 -1.5639729724408871e+00 1.4626366106935738e+00 -1.2286649953661322e+00
+387 -1.3226131489884976e+00 -1.0695489941103238e+00 -1.4911056977700106e+00
+484 -1.5087954881419843e+00 -2.3407968100714568e-01 8.9471716161806181e-01
+98 3.0624993702155973e-01 -1.5934008419723134e-01 -1.0700290072035785e+00
+282 -1.1678279208732874e+00 8.8816716100523774e-01 -3.7594379985472404e-01
+203 -1.2339287833422024e+00 -1.0851718286318821e+00 -3.5581216293468138e-02
+189 -2.5647410293058537e-01 -3.6970804452537526e-01 -2.7009745024499721e-01
+267 -2.3687566231483211e-02 5.4572876490146272e-01 1.2664988400691950e+00
+155 1.7966821085178111e-01 -1.4254017034472058e-01 -1.0614594824352175e+00
+313 -4.0833555690731410e-02 -4.8193546027639250e-01 -4.9602679807507655e-02
+250 4.3395781720710452e-01 9.1543705256636054e-01 -6.1780594432179192e-01
+49 -5.8496932778818223e-01 2.0845066341835794e-01 -4.5660091846264866e-01
+111 -4.5577164923909808e-01 6.7687347516731056e-01 1.6901306317755600e+00
+124 2.3017035971304120e-01 1.2116381200880159e-02 -1.0715278739263430e+00
+488 1.1047027896127255e+00 -1.2726762145955062e+00 7.7913777050672761e-01
+48 -4.0669146183799126e-01 -1.0181831306551499e+00 6.3005463620771884e-01
+123 -1.2387342693749222e-01 -4.5460712390743935e-01 -5.6241811130968811e-01
+423 1.5051226831159246e+00 -1.0959205280256621e+00 3.5512567969767117e-01
+329 1.5695087401093555e-01 -4.0810545929224507e-01 1.5430948869435961e+00
+334 -1.0908322951892016e+00 -5.4577269465148259e-01 -6.7521769754195848e-01
+146 4.4361993166914260e-01 4.1962079818012665e-01 -3.9049661615798353e-01
+500 -3.0319085884539942e-01 7.6039240701443311e-01 2.0836797720615632e+00
+46 -3.9431430053902639e-01 3.7259615958614579e-02 -1.6080049457594121e+00
+374 9.2250275913940816e-01 -5.4107808677407720e-01 6.5542912197165987e-02
+52 7.3216124676172201e-01 9.4572980131871298e-01 -1.2696251833719743e+00
+466 1.5044833940669236e+00 -1.8483350803538756e+00 9.5057050040256286e-01
+272 -4.0011978947412463e-01 2.0429884556820119e-01 5.4991808303431688e-01
+331 5.7814259377466726e-01 1.7044411336854475e+00 -7.2986308719737536e-01
+12 6.8240059668147224e-01 8.7756006768431916e-01 -1.7546792723746987e+00
+178 -1.1529369201979585e+00 -3.9576843637993819e-01 4.1654001054549866e-01
+376 -1.3678465511836193e+00 -1.5404423718061540e+00 -8.9187926122069050e-01
+117 -1.3804354656475164e+00 -7.9075263667417661e-01 -3.6776627245391275e-01
+419 1.9706999335859632e-01 -1.1815264434314889e+00 1.6458873301525292e-01
+379 2.6784097653047051e-01 3.8806722276897926e-01 4.0603233211791162e-01
+53 1.0521966172519035e+00 5.1164934479578683e-01 9.8577725800730365e-01
+28 1.0129347774638686e+00 1.1058165761432928e+00 -3.9034581257989903e-01
+300 -1.4876024382598509e+00 -6.2467982655378396e-01 1.3535210035045362e+00
+83 9.7907832581926901e-01 1.4855763621624507e+00 4.1007013403839504e-01
+304 -1.8963384707907098e+00 5.2731582964002444e-01 1.0962854464287259e+00
+51 8.5928138726119452e-01 -1.6162178311396377e-01 3.2746315986045632e-01
+180 -3.4136546641226267e-01 1.2771151944969144e+00 -3.9578085701206223e-01
+60 -1.7295103304379962e+00 -9.9492157200179920e-01 1.2224650147307308e+00
+255 5.0740556300071860e-01 -6.6892801585963790e-01 -4.4506165343889975e-01
+336 3.8821455708543523e-01 7.4853146108739210e-01 1.0393955338521590e+00
+74 -8.5185557539809709e-01 5.0099005012093123e-01 6.1187012870886726e-01
+11 -4.8598509694234203e-02 -9.7638648911006976e-01 3.6444261502684167e-01
+106 -8.3362253193144206e-02 -9.2846276284357510e-01 -2.0625632391587234e-01
+342 -1.1850581603543699e+00 -4.6587013939418070e-01 -2.5534356631266192e-01
+366 1.8044603030107276e-01 1.2857436498573065e+00 -4.9002876775305132e-01
+153 5.9355527141792742e-02 8.5309993046714749e-01 1.2601655126485285e+00
+384 -3.3199740732000493e-02 -1.1586262754841152e+00 2.7022536481319620e-01
+444 1.4366137360637837e-01 7.4777392856310121e-01 -1.3874143079892032e-01
+169 -2.2106828869911426e-02 4.3869453276997300e-01 -8.6050040149769946e-01
+312 -9.5175409360636343e-01 -7.9143766981298624e-01 -8.3109524929616485e-01
+131 -1.2661930440833327e-01 4.9560114120296378e-01 1.2481373975929652e-01
+290 4.7988720115251880e-03 4.3307801179133432e-01 7.2537453656012302e-02
+240 -1.4417217340859769e+00 1.4357683004122099e+00 1.4379544254488796e-01
+403 9.8034468727423985e-01 -2.0429774540476020e-01 -1.4454160823908524e-01
+438 1.1944593880989134e+00 2.3483551676455661e+00 1.9984542691081373e+00
+420 1.3486549938776343e-01 -1.0783938772052266e+00 2.4823645088293936e-01
+175 1.6188020186225376e+00 -1.2786027961604105e+00 1.5779664705358573e-01
+305 7.6304434006850930e-01 -1.0609104653684724e+00 -1.3992599279449225e+00
+341 -5.0986549858189378e-01 9.7345649143358179e-02 -5.4583627317992767e-01
+291 1.4937985447982780e+00 1.1345276617898410e-01 1.4735185581275405e+00
+372 2.7438262099508215e-01 2.3642499432513167e-01 2.0103126185436837e-01
+293 -2.0812456222423235e+00 1.2585017395884830e+00 -1.9872753884412903e-01
+461 -1.7366155414350635e+00 6.7563297134023181e-01 -3.0241597324899727e-01
+263 -9.9912758612453856e-01 -1.4606803538340738e+00 -4.8347668759554979e-01
+473 1.5452042518079840e+00 1.3194414166855677e+00 -1.4200655529990689e+00
+166 -1.4143951087621034e-01 -1.1527398628151918e-01 -2.0324403247706951e-01
+132 1.6628716981291343e+00 -2.6751309910688231e-02 -1.1291183234894089e+00
+32 -2.3270952903472897e-01 2.1018074582476372e+00 1.1203402946623044e+00
+475 -1.2604994295453433e+00 -9.5339439518226154e-01 3.3662881048301792e-02
+283 1.2749816341052314e+00 4.7341406662300863e-02 8.5345288522412766e-01
+205 9.6825352209774937e-02 -1.0389613158303228e+00 -5.4505460161674724e-01
+158 8.8743698904591217e-01 1.0689060843560156e+00 6.1104406355038210e-01
+1 5.8273461256327774e-01 4.6658417699512433e-01 7.3964607391058224e-01
+485 4.4170048227791719e-01 -2.8581555293721217e-01 -1.0141724005249320e+00
+378 6.6654429040499424e-02 -1.8224880226392848e+00 1.2128685020610477e+00
+71 -9.1996605782626056e-02 -2.2291664206826806e-01 -1.2402752353304469e+00
+320 -9.5597854323421563e-02 -8.6852053155856812e-01 3.5066557017928385e-01
+481 7.9532304801009490e-01 -9.4180364698072971e-01 1.2495155405965916e-02
+318 -4.1450336396091869e-01 3.4120074122917260e-01 7.3720013366972947e-01
+496 -6.2393347120986375e-01 1.0494054390993381e+00 -1.0590359239552802e+00
+319 -9.5320962685676391e-01 9.2021659866276262e-01 5.5763202369068265e-01
+338 9.5180850251473562e-02 7.4581412234581979e-02 1.6562396473566801e+00
+358 -1.2039319191565336e+00 9.9635011716776112e-01 3.7776599320072252e-01
+225 -2.2878377680357529e-02 5.9393376646738183e-01 -1.7855655501046659e+00
+55 -1.2658412487783444e+00 8.3827650617277571e-01 3.7229167676011687e-01
+362 -1.1190412417889151e+00 -4.1093870066191052e-01 -5.5943417408569930e-02
+36 -1.1944064170110538e+00 5.2834231151049005e-01 -2.5661926163189262e-02
+133 -3.4046853819783990e-01 8.7037698795660379e-01 2.6480748394474063e-01
+412 -8.2433971673189854e-01 -2.4364204997081215e-01 3.2182546651866395e-01
+89 1.2836300411162869e+00 -1.0534639266932924e+00 1.7273414295686953e+00
+38 -5.4018176537409834e-01 1.1189891674084445e+00 -5.4949279328730405e-01
+337 2.0680174457636453e+00 6.8756290663232345e-01 -5.3885707707990127e-01
+233 -1.1580426595635346e-01 1.5352867360847791e+00 1.8920809100075830e-02
+442 -5.2273629379374242e-01 -4.7573534425197805e-01 3.3349171376798270e-01
+143 -6.4456717536444386e-01 -8.2221860127843815e-01 -3.1619033462829965e-01
+194 4.6000888686458724e-01 2.4868203858229221e-01 -1.0844456124091726e+00
+308 7.0412401782623568e-01 1.4705120558853640e-01 -1.6984333842169479e+00
+286 -7.4825969157579242e-01 1.7277684761952672e+00 -4.8658995664543186e-01
+61 -1.4087152482721139e+00 -1.9786675343505181e-01 -8.3049863139435698e-01
+426 -5.9317510584759559e-01 -3.1176204329742879e-01 9.7102624547089234e-01
+450 2.9660411649227686e-01 -1.0106432265344207e+00 -8.0608626908093450e-01
+188 -4.5857896465809017e-01 8.3781782180488956e-02 1.6679261825519407e+00
+94 -2.5105865768429703e-01 -1.0277678550758340e+00 2.0110948522283595e-01
+252 -2.6250057461969262e-01 -8.3779492646801812e-01 -1.2443616941843418e+00
+321 -1.3849227082743394e+00 -7.9961814140752485e-01 -5.2758719721869440e-01
+156 9.7248559724229044e-01 1.4856613149426096e-01 -1.0762319296143268e+00
+167 4.8758255885458768e-01 3.7938735168685944e-01 -8.2720329905212353e-01
+330 4.8147102008722625e-01 -2.9945144880586022e-01 -7.3225876236181275e-01
+195 1.6483881807209144e+00 -1.9598861592898009e-01 -6.2914422221089158e-02
+78 2.3726384124165131e-01 2.7514362192855252e-01 1.7571172900747967e-01
+157 -2.1865090243933563e+00 3.7632441115528098e-01 7.7333806476534650e-01
+373 9.8884980347773899e-01 3.4121263358683873e-01 -3.3626353210018556e-01
+56 -5.1874451216553419e-01 5.0889073330236878e-01 -7.4130187216438104e-01
+315 1.5052362221906064e+00 -6.9840962964633502e-01 -1.5488907100876634e-01
+478 1.1082301092586995e+00 2.9770971389874379e-01 -5.4580262091212584e-01
+109 9.4396405787734605e-01 4.9318782477478235e-01 1.2284569230572757e+00
+357 -1.2371693160029509e+00 4.1376978558912775e-01 3.3529522630256764e-01
+454 -4.4342104600414733e-01 -3.5633346291025164e-01 -2.1131375082322554e-01
+86 -2.6274074185542590e-01 -2.7912975828127135e-01 -3.4564526880694488e-02
+262 -2.0457328043953218e-01 -1.0549970010682790e+00 -8.7569945910877856e-02
+160 1.2508565962389259e-01 -1.1049367732134203e+00 -4.1414673901133031e-01
+168 -5.4833844765552298e-01 9.1122623641564449e-01 -8.1466354261018392e-01
+145 1.9424847461468013e+00 2.4717259671440775e-01 -8.1993106446423478e-02
+400 -7.3927429735196926e-01 9.6278828542903949e-01 9.8355595468926471e-01
+395 -1.7616642510587566e+00 -4.3839255400156163e-01 -4.3538389834393521e-01
+297 -8.1651466743971568e-01 1.9480772762641770e-02 -2.5090574319554337e-01
+207 9.9594630394692474e-01 -5.5333584932915503e-01 -8.1672262386449701e-02
+460 3.8863838706865678e-01 -1.9332061145329088e-01 7.5599739666291188e-01
+44 4.9322334356812642e-01 5.4097595609480897e-01 1.2096517945019982e+00
+377 -2.3588544095100208e-02 4.2843373407529362e-01 -8.1372583759538197e-01
+411 -7.0187880935114388e-01 -8.8403807864596529e-01 3.6139298123719504e-01
+396 -6.1324863673452068e-01 -5.8579142070009704e-01 -1.6980414017487730e+00
+424 -1.4405372913769463e+00 -1.2161267905238037e-01 -7.1275775203655271e-03
+66 -2.2455608800065441e-01 -5.5194985983545303e-02 -2.9031971555487707e-01
+371 2.8693892522161785e-01 -1.3942754264578394e-01 4.1638095135795372e-01
+186 -2.5542578648985415e-01 -2.7435394726950046e-01 1.6931522427410683e+00
+413 5.6007135922818607e-01 1.2189064363493711e-01 -7.5271223366515216e-01
diff --git a/examples/USER/uef/nvt_uniaxial/in.nvt_uniaxial b/examples/USER/uef/nvt_uniaxial/in.nvt_uniaxial
new file mode 100644
index 0000000000000000000000000000000000000000..613f46935cba579aa52fba7f6cfaed40a932ef3a
--- /dev/null
+++ b/examples/USER/uef/nvt_uniaxial/in.nvt_uniaxial
@@ -0,0 +1,28 @@
+# uniaxial NVT deformation of WCA fluid
+
+units		lj
+atom_style	atomic
+
+
+pair_style	lj/cut 1.122562
+read_data       data.wca
+pair_coeff	1 1 1.0 1.0
+pair_modify     shift yes
+
+neighbor	0.5 bin
+neigh_modify	delay 0
+
+change_box      all triclinic
+
+fix		1 all nvt/uef temp 0.722 0.722 0.5 erate -0.5 -0.5
+
+fix             2 all momentum 100 linear 1 1 1
+
+#dump		1 all atom 25 dump.lammpstrj
+
+#dump		2 all cfg/uef 25 dump.*.cfg mass type xs ys zs
+
+thermo_style    custom step c_1_press[1] c_1_press[2] c_1_press[3]
+
+thermo		50
+run		10000
diff --git a/examples/USER/uef/nvt_uniaxial/log.22Sep17.npt_uniaxial.g++.1 b/examples/USER/uef/nvt_uniaxial/log.22Sep17.npt_uniaxial.g++.1
new file mode 100644
index 0000000000000000000000000000000000000000..2c13448192214878b0d36c4568820ada2ed610dc
--- /dev/null
+++ b/examples/USER/uef/nvt_uniaxial/log.22Sep17.npt_uniaxial.g++.1
@@ -0,0 +1,281 @@
+LAMMPS (22 Sep 2017)
+# uniaxial NVT deformation of WCA fluid
+
+units		lj
+atom_style	atomic
+
+
+pair_style	lj/cut 1.122562
+read_data       data.wca
+  orthogonal box = (0 0 0) to (8.39798 8.39798 8.39798)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  500 atoms
+  reading velocities ...
+  500 velocities
+pair_coeff	1 1 1.0 1.0
+pair_modify     shift yes
+
+neighbor	0.5 bin
+neigh_modify	delay 0
+
+change_box      all triclinic
+  triclinic box = (0 0 0) to (8.39798 8.39798 8.39798) with tilt (0 0 0)
+
+fix		1 all nvt/uef temp 0.722 0.722 0.5 erate -0.5 -0.5
+
+fix             2 all momentum 100 linear 1 1 1
+
+#dump		1 all atom 25 dump.lammpstrj
+
+#dump		2 all cfg/uef 25 dump.*.cfg mass type xs ys zs
+
+thermo_style    custom step c_1_press[1] c_1_press[2] c_1_press[3]
+
+thermo		50
+run		10000
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 1.62256
+  ghost atom cutoff = 1.62256
+  binsize = 0.811281, bins = 11 11 11
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair lj/cut, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton/tri
+      stencil: half/bin/3d/newton/tri
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 2.742 | 2.742 | 2.742 Mbytes
+Step c_1_press[1] c_1_press[2] c_1_press[3] 
+       0    6.3937851    7.0436438    6.4461087 
+      50    10.369902    10.999889    6.5437384 
+     100    12.411384    13.145871    7.8892802 
+     150     12.88182    12.114068    7.5155182 
+     200    10.375571    10.979773    6.5624056 
+     250    10.158901    10.251273    5.4525068 
+     300    10.011314    9.8371382    5.4031393 
+     350    9.4890008    9.7992108     5.054963 
+     400    9.1715116    9.2647886    4.9600208 
+     450    9.8495961    9.3353483     4.986786 
+     500    9.5903809     8.576173    4.8252116 
+     550    8.8057509    9.7152078    4.9089022 
+     600    8.9743682    10.031277    4.8924072 
+     650    10.233852    9.3059527    5.1977058 
+     700    9.2361906    9.6439971    5.0923763 
+     750    10.449751    9.5911465    5.2548714 
+     800    9.8366857    9.9873885    4.9456956 
+     850    10.266351    9.9620134    5.0602713 
+     900    9.6325813    9.3910946    5.1466624 
+     950    9.1911919    9.1519006    4.8275345 
+    1000    9.2871712    9.8508501    5.2674099 
+    1050    9.7052035    10.291793     5.160359 
+    1100    9.9425552    10.361827    5.5520484 
+    1150    10.377563     10.26588    5.5997436 
+    1200    9.6368269    10.354413    5.4137873 
+    1250    9.2071055    9.1706334    4.9800411 
+    1300    9.7140516    8.7257777    4.8536943 
+    1350    10.145983    9.8247797    4.8921813 
+    1400    9.1998975    9.6935675    5.2269871 
+    1450    9.4838136    8.9035726    4.6867121 
+    1500     9.066389     9.811658    4.8875316 
+    1550    10.007083    9.8946118     5.204609 
+    1600     9.722137    9.9291588     5.326712 
+    1650    9.7424614    9.6995966    5.3577819 
+    1700    9.7185135    9.5922931     5.385856 
+    1750    9.6368481    8.1414594    5.0046079 
+    1800    9.8065782    9.6798036      4.92473 
+    1850    9.0463737    10.024205    5.6495695 
+    1900    9.0774484    10.527582    5.0363426 
+    1950    9.1691461    10.049703    4.8517514 
+    2000    9.7619602    10.049532    5.2088038 
+    2050    9.9203348    9.8056395    5.7699852 
+    2100    9.1455665    9.5864018    5.1410542 
+    2150    8.8501135    9.7917307     5.141824 
+    2200    9.1433685    9.8168548    5.1742441 
+    2250    9.3248968    10.454532    5.5456356 
+    2300    10.210505     9.361499    5.0935226 
+    2350    9.5041186     9.598986    5.2451005 
+    2400     8.968896    9.2811921    4.7799868 
+    2450    9.8201159    10.133892    4.9981817 
+    2500    9.4421084    9.7875766    5.4124809 
+    2550    9.6410643    9.9148309    5.0603288 
+    2600    9.3810676    9.5346849    5.3233836 
+    2650    9.2340337    8.7233538    5.0361302 
+    2700     9.405597    9.7848488    4.7537042 
+    2750    9.9447412    9.2366502    4.9736459 
+    2800    10.189443     9.452684     5.624362 
+    2850    9.6680124    9.0941543    5.0101469 
+    2900    8.4350889    9.1595018    4.7706866 
+    2950    9.0056117    9.3498593    4.7655963 
+    3000    9.4795416    9.7400747    5.4705868 
+    3050    9.6605264    9.4558374    5.1240166 
+    3100     9.681451    9.7966554    5.3174458 
+    3150    9.7887528    9.7371194    4.9349103 
+    3200    9.9720054    9.5592538    5.2306417 
+    3250    10.185023    9.9436037    5.4127312 
+    3300    9.9670676    9.6069307     5.026261 
+    3350    9.0090411    9.5975249    4.8881019 
+    3400    9.6535707     9.748683    4.9933873 
+    3450    9.7774977    9.7570511    4.8512619 
+    3500    9.7250053     10.36386    5.4057249 
+    3550     9.567788     9.773413    5.3936671 
+    3600    9.2094148    9.0211247    5.2328675 
+    3650    9.3512098      9.73407     4.576624 
+    3700     9.159074    9.2611591    5.2996448 
+    3750    9.4772798    8.9792211    5.1007079 
+    3800    9.3898357    9.1150609    5.2126508 
+    3850    9.2466312    9.1460651     4.867472 
+    3900    9.9316993    9.3061137    5.1219265 
+    3950    9.2550913     9.780254     5.371885 
+    4000    10.005356    9.7328972    5.2535791 
+    4050    9.5228242    9.5134113    5.4181393 
+    4100     9.576808    10.082425    5.1272491 
+    4150     10.20265    9.6667058     5.529118 
+    4200    9.9443671     10.09427    5.6163734 
+    4250    9.4956875    9.9462818    5.0820085 
+    4300    10.350144    10.003702    5.1621505 
+    4350    10.119486    9.8691507    5.4913457 
+    4400    9.4991604    10.516185    5.2512264 
+    4450    9.0812854    9.2835603    5.1695714 
+    4500    9.7538478    8.5298834    4.5954607 
+    4550    9.8920394    9.3581299     5.202587 
+    4600    9.2694921    9.3338291     5.201958 
+    4650    10.925818    9.5062049    4.8694531 
+    4700    11.020014    10.130224     5.582409 
+    4750    9.5005142    9.3571561     5.456739 
+    4800    9.6670147    9.6628702    5.0451252 
+    4850     9.134147    9.1308788    4.7950594 
+    4900    9.7466206    8.6171443    4.8716275 
+    4950    9.0397505    9.1996036    5.2010502 
+    5000    9.6898652    9.8914655    5.3959279 
+    5050     10.06771    9.7523891    5.3690408 
+    5100    10.000963    9.6279379    5.4077384 
+    5150    9.8686159    10.179702    5.0686824 
+    5200    9.8866247     9.725152    5.4350049 
+    5250    9.2068346    10.214424    5.3187713 
+    5300     9.713992    9.8069045     5.496359 
+    5350     9.423852     9.364085    5.2144606 
+    5400    9.4350241    9.5584633    5.0339001 
+    5450    10.555124    10.784922    5.1938072 
+    5500    9.4147344     10.33187    5.4360602 
+    5550    9.8514653    9.6575827    5.4959779 
+    5600    9.3138107    9.6592624     4.941387 
+    5650    9.1224809    8.7112257    5.0435936 
+    5700    8.8289158    10.749686    4.8916132 
+    5750    9.7200279    10.030606    5.2033161 
+    5800    9.8439873    9.6289015    5.5131934 
+    5850    9.6257294    9.4128988    4.9196038 
+    5900    9.7490214    9.5776313    5.0301815 
+    5950    9.1430855    10.108944    5.1406243 
+    6000    9.3358644    9.5633737    4.9787073 
+    6050    9.4432774    8.9464304    5.1466052 
+    6100    8.8878373    9.5048946    4.9190238 
+    6150    9.6451898    9.2419823    5.0159841 
+    6200    9.5042173    8.9414307    5.2634247 
+    6250    9.0896505    9.7230651    5.3340322 
+    6300    8.8100599    8.8781352    5.4117914 
+    6350    9.3104601    9.0233294    5.3136432 
+    6400     9.368101    9.6387362    4.7833216 
+    6450    10.334343    9.8384149    5.3606204 
+    6500    9.8824036    10.022627    6.0857086 
+    6550    9.7034443    10.026765      5.17604 
+    6600    9.3757845     9.899268    5.2301359 
+    6650    10.540821      10.4343    5.5287065 
+    6700    9.6317649    9.8923579    5.6045768 
+    6750    9.5982772     10.07897    5.1221451 
+    6800    10.239883    10.189967    5.3167447 
+    6850    10.017271    9.7680902     5.229621 
+    6900    9.6200416    10.129301    5.1998759 
+    6950    9.0361417     8.923798    5.1652612 
+    7000    9.3153521     9.063054    4.6860773 
+    7050    8.6434091    9.0363436    4.7811975 
+    7100    9.4955395    9.3830541     5.022538 
+    7150    9.3392402    9.1847119    5.1544622 
+    7200    9.4676321    9.8370036    4.8854442 
+    7250    9.5115882    10.350324    4.9780525 
+    7300    9.6025583    9.6247917     5.473794 
+    7350    9.8919524    10.049446    4.9816931 
+    7400    9.6814319    9.9410894     5.265078 
+    7450    9.4130955    10.191436    5.2531256 
+    7500    9.8114668    8.8461635    5.0562894 
+    7550    10.321567    9.4730124    5.2043655 
+    7600    9.5059024    9.8330367    5.0749721 
+    7650    10.067084    10.606423    5.5598818 
+    7700    10.896159    10.084281    5.5159718 
+    7750     9.754306    10.162301    5.2475876 
+    7800    9.7278145     9.801009    5.0685504 
+    7850    9.8639905    10.323104    5.2458864 
+    7900    9.7246799    9.1377357    5.1841319 
+    7950    10.381792    9.6977533    5.0977386 
+    8000    9.7265224    9.2999829    4.7925571 
+    8050    9.5203288    9.3144956    4.7539211 
+    8100    9.6900973    9.6614063     5.304712 
+    8150     9.713677    9.9154149    4.8178575 
+    8200    9.4733597    9.8948632     5.036112 
+    8250    9.7783036    9.9554334    5.3355682 
+    8300    9.4034783    9.9156801    5.5539279 
+    8350    9.2984025    9.2013949    5.0753991 
+    8400    9.9299078    9.6166801     5.115776 
+    8450     9.526737    9.3312125    4.7800587 
+    8500     9.581956    10.065906    4.9756092 
+    8550    9.2767953     9.326006    5.3024978 
+    8600     9.799968    8.4660845    5.0199109 
+    8650    9.8985354    10.127852    4.9098064 
+    8700    8.7952691    10.521133    5.6840528 
+    8750    9.8299997    9.5588553    5.3085734 
+    8800    9.0811776    9.5704532    5.1684993 
+    8850    9.8303571    9.7618932    5.1251259 
+    8900    9.9238794    9.9654863    5.2999683 
+    8950    10.851304    9.9682289    5.4133763 
+    9000    9.5523794    9.1890766    5.1469144 
+    9050    9.7461948    8.9611236    4.9490826 
+    9100    10.138917    9.6757567    5.0473544 
+    9150    9.4869835    9.4786575    5.0142464 
+    9200    10.263518    10.079135    5.1493398 
+    9250    9.8691684    9.0908275    5.3221203 
+    9300    9.8586707    9.4177643    5.1525265 
+    9350    9.3375816    9.9167208    5.4846207 
+    9400    9.5603903    9.4813199    4.6237495 
+    9450     10.30892    9.5217736    5.6163214 
+    9500     9.327949    9.9831649    4.8923915 
+    9550    9.8421656    9.3202702    5.3352046 
+    9600    8.8543704    9.4556702    4.6430041 
+    9650    10.103399    9.2161072    4.8658062 
+    9700     9.507811    9.9647378     5.227369 
+    9750    9.4988096    8.9942893    5.2491418 
+    9800    9.8007958     9.234452    5.1740203 
+    9850    9.6029685    10.076042     5.023107 
+    9900    9.4035691     10.13782    5.2775777 
+    9950    9.6517135    10.355994    5.3035779 
+   10000    9.9157616    9.7741952    5.5269431 
+Loop time of 3.47119 on 1 procs for 10000 steps with 500 atoms
+
+Performance: 1244529.812 tau/day, 2880.856 timesteps/s
+99.5% CPU use with 1 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.53077    | 0.53077    | 0.53077    |   0.0 | 15.29
+Neigh   | 2.219      | 2.219      | 2.219      |   0.0 | 63.93
+Comm    | 0.21866    | 0.21866    | 0.21866    |   0.0 |  6.30
+Output  | 0.0027909  | 0.0027909  | 0.0027909  |   0.0 |  0.08
+Modify  | 0.41956    | 0.41956    | 0.41956    |   0.0 | 12.09
+Other   |            | 0.08042    |            |       |  2.32
+
+Nlocal:    500 ave 500 max 500 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:    908 ave 908 max 908 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    3489 ave 3489 max 3489 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 3489
+Ave neighs/atom = 6.978
+Neighbor list builds = 4567
+Dangerous builds = 0
+
+Please see the log.cite file for references relevant to this simulation
+
+Total wall time: 0:00:03
diff --git a/examples/USER/uef/nvt_uniaxial/log.22Sep17.npt_uniaxial.g++.4 b/examples/USER/uef/nvt_uniaxial/log.22Sep17.npt_uniaxial.g++.4
new file mode 100644
index 0000000000000000000000000000000000000000..276eb2f443017d2b9fb1c4e52f8d981896fcb6c4
--- /dev/null
+++ b/examples/USER/uef/nvt_uniaxial/log.22Sep17.npt_uniaxial.g++.4
@@ -0,0 +1,281 @@
+LAMMPS (22 Sep 2017)
+# uniaxial NVT deformation of WCA fluid
+
+units		lj
+atom_style	atomic
+
+
+pair_style	lj/cut 1.122562
+read_data       data.wca
+  orthogonal box = (0 0 0) to (8.39798 8.39798 8.39798)
+  1 by 2 by 2 MPI processor grid
+  reading atoms ...
+  500 atoms
+  reading velocities ...
+  500 velocities
+pair_coeff	1 1 1.0 1.0
+pair_modify     shift yes
+
+neighbor	0.5 bin
+neigh_modify	delay 0
+
+change_box      all triclinic
+  triclinic box = (0 0 0) to (8.39798 8.39798 8.39798) with tilt (0 0 0)
+
+fix		1 all nvt/uef temp 0.722 0.722 0.5 erate -0.5 -0.5
+
+fix             2 all momentum 100 linear 1 1 1
+
+#dump		1 all atom 25 dump.lammpstrj
+
+#dump		2 all cfg/uef 25 dump.*.cfg mass type xs ys zs
+
+thermo_style    custom step c_1_press[1] c_1_press[2] c_1_press[3]
+
+thermo		50
+run		10000
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 1.62256
+  ghost atom cutoff = 1.62256
+  binsize = 0.811281, bins = 11 11 11
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair lj/cut, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton/tri
+      stencil: half/bin/3d/newton/tri
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 2.71 | 2.71 | 2.71 Mbytes
+Step c_1_press[1] c_1_press[2] c_1_press[3] 
+       0    6.3937851    7.0436438    6.4461087 
+      50    10.369902    10.999889    6.5437384 
+     100    12.411384    13.145871    7.8892802 
+     150     12.88182    12.114068    7.5155182 
+     200    10.375571    10.979773    6.5624056 
+     250    10.158901    10.251273    5.4525068 
+     300    10.011314    9.8371382    5.4031393 
+     350    9.4890008    9.7992108     5.054963 
+     400    9.1715116    9.2647886    4.9600208 
+     450    9.8495961    9.3353483     4.986786 
+     500    9.5903809     8.576173    4.8252116 
+     550    8.8057509    9.7152078    4.9089022 
+     600    8.9743682    10.031277    4.8924072 
+     650    10.233852    9.3059527    5.1977058 
+     700    9.2361906    9.6439971    5.0923762 
+     750    10.449751    9.5911465    5.2548714 
+     800    9.8366858    9.9873885    4.9456956 
+     850    10.266351    9.9620134    5.0602713 
+     900    9.6325811    9.3910947    5.1466627 
+     950    9.1911906    9.1518996    4.8275336 
+    1000    9.2871726    9.8508561    5.2674079 
+    1050    9.7052019    10.291782    5.1603609 
+    1100    9.9425827    10.361848    5.5520588 
+    1150     10.37757    10.265891    5.5997246 
+    1200    9.6367069    10.354451    5.4138749 
+    1250    9.2075006    9.1705136    4.9806417 
+    1300    9.7126123    8.7247301    4.8528533 
+    1350    10.146034    9.8319205    4.8958266 
+    1400      9.19638    9.7052094    5.2332913 
+    1450    9.4667073    8.9014702    4.7073387 
+    1500    9.0949401    9.8381729    4.8904182 
+    1550    10.068959    9.8096692    5.1613658 
+    1600    10.131443    9.7086921    5.0913546 
+    1650    9.9557572    9.9183989    5.2635245 
+    1700    9.3820418    8.6135867    4.9395498 
+    1750    8.9486365    9.3433293    5.2674469 
+    1800    10.352322    10.354432    5.3335153 
+    1850    8.9629784    9.6172224    5.0824828 
+    1900    8.5429652     9.749815    5.1577895 
+    1950    8.9828002    8.9631646    5.0948426 
+    2000    9.4171989    9.7122117    5.1386251 
+    2050     9.767299    9.6146113    4.8965319 
+    2100    10.049399     10.02243    5.3856622 
+    2150    9.2639644     10.09669    5.3020322 
+    2200    9.1916162    10.841127    5.0271109 
+    2250    9.9859708     8.519568      5.24568 
+    2300    9.9462279    9.5850706    5.0632906 
+    2350    9.8043011    9.7605126    5.3148323 
+    2400     9.089818    9.6474522    5.0012486 
+    2450     9.655874    9.3255636    4.8291262 
+    2500    9.0259445    9.3074827    5.1593031 
+    2550    9.1610478    9.7356492    5.0741161 
+    2600    9.3153881    9.3936949    5.4830363 
+    2650    9.6212696     9.598252    4.7172875 
+    2700     9.318906     9.665656    5.1917527 
+    2750    9.6613838    9.7106796    5.1843146 
+    2800    10.231844    9.9407641    4.6940455 
+    2850    10.008093    9.2781076    5.2624957 
+    2900    10.464168     9.808666    5.1457978 
+    2950    9.7080601    9.6972304     5.406456 
+    3000    9.6851209    10.050737    5.1198394 
+    3050    8.9093581     9.213428     5.233108 
+    3100    8.8098921    9.6607476    4.9625331 
+    3150    8.6608386    9.7503441    5.0737533 
+    3200    9.7004403    9.6748778    5.0734462 
+    3250    10.077054    10.318711    5.3233841 
+    3300     10.63887    9.4901739     5.516542 
+    3350    10.232909    9.3407073    5.4989967 
+    3400    9.8854134    9.9542625    5.1982468 
+    3450    9.9388203    9.3394716    4.8135833 
+    3500    9.2514026    9.5857527    5.4605449 
+    3550    8.3940282    9.0549836    4.6951548 
+    3600    8.7696659    9.6262816    4.6302087 
+    3650    9.0695173    8.8520895    5.0814723 
+    3700    9.4700744    9.2777557    4.6825004 
+    3750    9.0221844    9.3407486    5.3984156 
+    3800    9.1478369    9.1420043    5.3024474 
+    3850    9.2652818     9.700053    5.3355364 
+    3900    10.248456    9.2480211    5.2375956 
+    3950    9.8259922    9.6130415    5.5978761 
+    4000     9.404877    9.7931698    5.3767927 
+    4050    10.131713    9.7047295    5.2964594 
+    4100    9.8128638    9.4253237    5.5308166 
+    4150     10.25183    9.1333595    5.1957555 
+    4200    10.449736     9.295762    4.7863033 
+    4250    9.7304858    9.4482515     5.356439 
+    4300    9.2773777    9.4110855    4.9879246 
+    4350    8.9325082    9.3429549    5.0410132 
+    4400    9.2603855    9.7905381    4.7436126 
+    4450    8.7600443    9.9160722     5.196316 
+    4500    9.0824514    10.036035     4.873051 
+    4550    9.3884333    9.6644343    5.1154951 
+    4600    9.8181676    9.6385118    5.3639835 
+    4650    9.1574799    9.5386974    4.7487094 
+    4700    10.275911    9.5383553    5.1084297 
+    4750    9.2476854    9.3353591    5.4773008 
+    4800    9.3422407    9.1931821    4.9210291 
+    4850    9.9033126    9.6443642    5.1334553 
+    4900    9.8469467    9.6836455    5.5101146 
+    4950    10.211229     9.760253    5.0151668 
+    5000    9.3256273    10.109873    5.4129479 
+    5050    9.9704879    9.8504809    5.2191163 
+    5100    9.4820718     9.711504    4.9633504 
+    5150    9.2952171    9.6955742    5.3214246 
+    5200    9.8400791    9.4763906    4.9149518 
+    5250    10.080112    9.5634741    5.1701348 
+    5300    9.4221014    9.8149742    5.3605931 
+    5350    8.8228402    10.121343    5.3192212 
+    5400    9.7318719    10.508763    5.0044083 
+    5450    8.7429847    9.6583774     5.033313 
+    5500    9.9243256    10.239521    5.2956506 
+    5550    9.6088558    9.8555986     5.359257 
+    5600    10.904352     10.72033    5.7874034 
+    5650    9.4774477    9.7246962     5.596949 
+    5700    9.8527139    9.2188245    4.6518586 
+    5750    9.7932375    9.2763721    4.6663307 
+    5800    9.5630073    9.1704583    5.4784197 
+    5850    10.159996    9.5206168    5.0012706 
+    5900    9.1667978    9.6891715    5.1959301 
+    5950    9.2194131    9.2170699    5.1653264 
+    6000    9.5917124    9.2038051    5.1004966 
+    6050    9.4141124    9.9166471    5.0535712 
+    6100    10.231166    9.7746591    5.2399634 
+    6150    9.6054192      9.34446    5.0711646 
+    6200    9.9279645    8.9546561    5.4698039 
+    6250    9.1581437      8.95439    4.9336111 
+    6300    8.9257232    9.0665473     5.188718 
+    6350    9.7685695    9.0822789    4.8454457 
+    6400    9.5140226    9.5073414    5.3420644 
+    6450    9.5379198    9.3316002    5.1940338 
+    6500    10.216584    9.8982859    5.2790157 
+    6550    10.221286    10.913585    5.4521496 
+    6600    10.925166    10.088055    5.2079758 
+    6650    10.012501    9.3870455    5.1740433 
+    6700    10.176139    9.4869588    5.3783369 
+    6750    9.2819415    8.1431975    4.8256476 
+    6800     9.790723    9.7932943    5.3602927 
+    6850    9.1327199    9.8354267    5.1356642 
+    6900     9.240909    9.3854618    5.1767268 
+    6950    10.652806    9.4355671    5.1314851 
+    7000    9.1676917    10.055501    5.2824651 
+    7050    9.6486488    9.1063089    4.9596976 
+    7100    9.3733678    9.5875095    5.0882285 
+    7150     9.282243     9.354934    4.6752644 
+    7200    8.7370777      10.1445    5.0217245 
+    7250     9.563419     9.929442    5.1720916 
+    7300    9.9128346    9.2426863    5.3595023 
+    7350    9.9319949    9.9479477    5.4844629 
+    7400    9.4671739    9.2380048    5.3274217 
+    7450    9.7531193    9.4075641    4.9426738 
+    7500    10.331422    9.4704554    5.3036636 
+    7550    8.7724434     9.663179    4.9492563 
+    7600    10.222748    9.1674852    4.8135992 
+    7650    9.5288583    8.5866929    5.0342955 
+    7700    8.8832847    8.8035037    4.6184473 
+    7750    9.4847076    9.7969656    5.0035855 
+    7800    9.2782486    9.1283641    5.3151299 
+    7850     8.962989    8.7729872    5.0374817 
+    7900    9.6336556    9.4975426    4.9832641 
+    7950    10.125279    10.044247    5.2740873 
+    8000    9.9705097    9.6925988    5.2344188 
+    8050    9.7836318    9.5794261    4.9174534 
+    8100     10.79561     10.04809    5.1604639 
+    8150    9.7261726    10.372878    5.3083843 
+    8200    10.338569     10.08189    5.6555172 
+    8250    9.7833315    9.9834209    5.3790753 
+    8300    9.5857944    9.6052323    5.0689289 
+    8350    9.6706688    9.6189242    4.7725678 
+    8400    10.570423    9.5786093    5.1894242 
+    8450    9.6514599    10.168359    5.0733592 
+    8500    10.273682    9.9179284    5.4014563 
+    8550    9.3120287    10.146837    4.9895115 
+    8600     9.511943     9.644112     5.462624 
+    8650    10.380674    9.1117114     5.156727 
+    8700    10.068596    8.7687113    5.1440814 
+    8750    9.2484971    9.2477678    4.9318794 
+    8800    9.7298469    8.9480303    5.1151321 
+    8850    9.7299502    10.415138    4.7902908 
+    8900    10.966912    10.732962    5.4793574 
+    8950    10.328384    9.9501313    5.6238396 
+    9000    9.7385041    9.8319224    5.1926497 
+    9050    9.7971055    9.5740203    5.1111302 
+    9100    9.7789727    9.9281901    5.1786549 
+    9150    9.9306964    9.3360599    4.9524547 
+    9200    9.8798841    10.240752    5.1691344 
+    9250    10.185445    9.4934917    4.9188964 
+    9300    8.9184663    8.9349408    4.8079511 
+    9350    9.6552187    9.9846949    4.9619969 
+    9400    10.304306    9.2298208    5.2822855 
+    9450    9.8379613    10.041703    5.4186514 
+    9500    10.221443    9.5342818    4.8929802 
+    9550    9.9723047    10.072856    5.4169676 
+    9600    9.3923879    9.2984387    5.4452785 
+    9650    8.9072589    9.7482374    4.7835208 
+    9700    9.8370121    10.205922    5.0385145 
+    9750    9.4274542    9.4653248    5.6340681 
+    9800    9.7668106    9.3265705    5.3154126 
+    9850    10.422549    10.362922     5.361592 
+    9900    9.6264407    9.9790162    5.6381052 
+    9950     10.35018    9.8853593    5.2639184 
+   10000    9.6190853    9.7903758    5.1583115 
+Loop time of 1.32471 on 4 procs for 10000 steps with 500 atoms
+
+Performance: 3261084.476 tau/day, 7548.807 timesteps/s
+98.4% CPU use with 4 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.1301     | 0.13489    | 0.13886    |   0.9 | 10.18
+Neigh   | 0.5654     | 0.575      | 0.5883     |   1.3 | 43.41
+Comm    | 0.35135    | 0.37147    | 0.38856    |   2.4 | 28.04
+Output  | 0.0028336  | 0.0032777  | 0.004292   |   1.0 |  0.25
+Modify  | 0.16328    | 0.16711    | 0.17231    |   0.8 | 12.61
+Other   |            | 0.07297    |            |       |  5.51
+
+Nlocal:    125 ave 128 max 122 min
+Histogram: 1 1 0 0 0 0 0 0 1 1
+Nghost:    477 ave 489 max 469 min
+Histogram: 2 0 0 0 0 1 0 0 0 1
+Neighs:    861 ave 877 max 848 min
+Histogram: 1 0 1 0 0 1 0 0 0 1
+
+Total # of neighbors = 3444
+Ave neighs/atom = 6.888
+Neighbor list builds = 4560
+Dangerous builds = 0
+
+Please see the log.cite file for references relevant to this simulation
+
+Total wall time: 0:00:01
diff --git a/examples/coreshell/in.coreshell.wolf b/examples/coreshell/in.coreshell.wolf
new file mode 100644
index 0000000000000000000000000000000000000000..337057d0c829b9b033503e37c7d1cd8b5cdf7aa6
--- /dev/null
+++ b/examples/coreshell/in.coreshell.wolf
@@ -0,0 +1,71 @@
+# Testsystem for core-shell model compared to Mitchel and Finchham
+# Shamelessly copied from in.coreshell.dsf but this script uses Wolf summation.
+
+# ------------------------ INITIALIZATION ----------------------------
+
+units 		metal
+dimension	3
+boundary	p	p	p
+atom_style	full
+
+# ----------------------- ATOM DEFINITION ----------------------------
+
+fix csinfo all property/atom i_CSID
+read_data data.coreshell fix csinfo NULL CS-Info
+
+group cores type 1 2
+group shells type 3 4
+
+neighbor 2.0 bin
+comm_modify vel yes
+
+# ------------------------ FORCE FIELDS ------------------------------
+
+pair_style   born/coul/wolf/cs 0.1 20.0 20.0    # A, rho, sigma=0, C, D 
+pair_coeff   * *      0.0 1.000   0.00  0.00   0.00
+pair_coeff   3 3    487.0 0.23768 0.00  1.05   0.50 #Na-Na
+pair_coeff   3 4 145134.0 0.23768 0.00  6.99   8.70 #Na-Cl
+pair_coeff   4 4 405774.0 0.23768 0.00 72.40 145.40 #Cl-Cl
+
+bond_style harmonic
+bond_coeff 1 63.014 0.0
+bond_coeff 2 25.724 0.0
+
+# ------------------------ Equilibration Run -------------------------------
+
+reset_timestep 0
+
+thermo 50
+thermo_style custom step etotal pe ke temp press &
+             epair evdwl ecoul elong ebond fnorm fmax vol
+
+compute CSequ all temp/cs cores shells 
+
+# output via chunk method
+
+#compute prop all property/atom i_CSID
+#compute cs_chunk all chunk/atom c_prop
+#compute cstherm all temp/chunk cs_chunk temp internal com yes cdof 3.0
+#fix ave_chunk all ave/time 100 1 100 c_cstherm file chunk.dump mode vector 
+
+thermo_modify temp CSequ
+
+# velocity bias option
+
+velocity all create 1427 134 dist gaussian mom yes rot no bias yes temp CSequ
+velocity all scale 1427 temp CSequ
+
+fix thermoberendsen all temp/berendsen 1427 1427 0.4
+fix nve all nve
+fix_modify thermoberendsen temp CSequ
+
+# 2 fmsec timestep
+
+timestep 0.002
+run 500
+
+unfix thermoberendsen
+
+# ------------------------ Dynamic Run -------------------------------
+
+run 1000
diff --git a/examples/coreshell/log.22Sep17.coreshell.wolf.g++.1 b/examples/coreshell/log.22Sep17.coreshell.wolf.g++.1
new file mode 100644
index 0000000000000000000000000000000000000000..35c30913fcb46b65294dfcc6f1ae4955420dcfa7
--- /dev/null
+++ b/examples/coreshell/log.22Sep17.coreshell.wolf.g++.1
@@ -0,0 +1,190 @@
+LAMMPS (22 Sep 2017)
+# Testsystem for core-shell model compared to Mitchel and Finchham
+# Shamelessly copied from in.coreshell.dsf but this script uses Wolf summation.
+
+# ------------------------ INITIALIZATION ----------------------------
+
+units 		metal
+dimension	3
+boundary	p	p	p
+atom_style	full
+
+# ----------------------- ATOM DEFINITION ----------------------------
+
+fix csinfo all property/atom i_CSID
+read_data data.coreshell fix csinfo NULL CS-Info
+  orthogonal box = (0 0 0) to (24.096 24.096 24.096)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  432 atoms
+  scanning bonds ...
+  1 = max bonds/atom
+  reading bonds ...
+  216 bonds
+  1 = max # of 1-2 neighbors
+  0 = max # of 1-3 neighbors
+  0 = max # of 1-4 neighbors
+  1 = max # of special neighbors
+
+group cores type 1 2
+216 atoms in group cores
+group shells type 3 4
+216 atoms in group shells
+
+neighbor 2.0 bin
+comm_modify vel yes
+
+# ------------------------ FORCE FIELDS ------------------------------
+
+pair_style   born/coul/wolf/cs 0.1 20.0 20.0    # A, rho, sigma=0, C, D
+pair_coeff   * *      0.0 1.000   0.00  0.00   0.00
+pair_coeff   3 3    487.0 0.23768 0.00  1.05   0.50 #Na-Na
+pair_coeff   3 4 145134.0 0.23768 0.00  6.99   8.70 #Na-Cl
+pair_coeff   4 4 405774.0 0.23768 0.00 72.40 145.40 #Cl-Cl
+
+bond_style harmonic
+bond_coeff 1 63.014 0.0
+bond_coeff 2 25.724 0.0
+
+# ------------------------ Equilibration Run -------------------------------
+
+reset_timestep 0
+
+thermo 50
+thermo_style custom step etotal pe ke temp press              epair evdwl ecoul elong ebond fnorm fmax vol
+
+compute CSequ all temp/cs cores shells
+
+# output via chunk method
+
+#compute prop all property/atom i_CSID
+#compute cs_chunk all chunk/atom c_prop
+#compute cstherm all temp/chunk cs_chunk temp internal com yes cdof 3.0
+#fix ave_chunk all ave/time 100 1 100 c_cstherm file chunk.dump mode vector
+
+thermo_modify temp CSequ
+
+# velocity bias option
+
+velocity all create 1427 134 dist gaussian mom yes rot no bias yes temp CSequ
+Neighbor list info ...
+  update every 1 steps, delay 10 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 22
+  ghost atom cutoff = 22
+  binsize = 11, bins = 3 3 3
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair born/coul/wolf/cs, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+velocity all scale 1427 temp CSequ
+
+fix thermoberendsen all temp/berendsen 1427 1427 0.4
+fix nve all nve
+fix_modify thermoberendsen temp CSequ
+
+# 2 fmsec timestep
+
+timestep 0.002
+run 500
+Per MPI rank memory allocation (min/avg/max) = 8.59 | 8.59 | 8.59 Mbytes
+Step TotEng PotEng KinEng Temp Press E_pair E_vdwl E_coul E_long E_bond Fnorm Fmax Volume 
+       0   -637.41039   -677.06805    39.657659         1427   -21302.622   -677.06805    1.6320365   -678.70009            0            0 1.2935454e-14 3.3306691e-15      13990.5 
+      50   -635.62711   -667.67134    32.044236    1153.0465   -4533.0717   -669.45724     37.76997   -707.22721            0    1.7858939    9.6844521    2.2001656      13990.5 
+     100   -632.76337   -662.83035    30.066977    1081.8989   -3492.8736   -664.98454    39.197093   -704.18164            0    2.1541967    11.063962    2.1543406      13990.5 
+     150   -630.82538   -663.70056    32.875182    1182.9464   -74.330324   -666.12202    46.263665   -712.38569            0    2.4214607    11.739683    2.7558416      13990.5 
+     200    -629.1541   -664.54637     35.39227    1273.5187   -1707.5508   -666.87772    41.796197   -708.67391            0    2.3313445    10.594804    3.0025376      13990.5 
+     250   -627.86587   -662.60879    34.742918    1250.1531   -1258.7537   -665.21416    43.017024   -708.23118            0    2.6053655    10.576999    1.8400986      13990.5 
+     300   -627.10755   -664.12897    37.021419    1332.1403   -1891.3466   -666.39618    40.769593   -707.16577            0    2.2672094     9.412943    1.2434258      13990.5 
+     350   -626.27558   -665.04303    38.767448    1394.9676   -1436.8514   -667.47081    41.854746   -709.32556            0    2.4277827    10.304721     1.977594      13990.5 
+     400   -625.55098   -661.86388    36.312896    1306.6455   -331.92076    -664.4632    44.426542   -708.88975            0     2.599325    11.081635    2.1734468      13990.5 
+     450   -624.88626   -661.07359    36.187328    1302.1272    -2325.834    -663.6031    39.662697   -703.26579            0    2.5295037    9.9810051    1.3068929      13990.5 
+     500   -623.87093   -660.24145    36.370525    1308.7192    410.85324   -662.86944    45.869201   -708.73864            0    2.6279856    10.592785    1.8162326      13990.5 
+Loop time of 11.7065 on 1 procs for 500 steps with 432 atoms
+
+Performance: 7.381 ns/day, 3.252 hours/ns, 42.711 timesteps/s
+100.0% CPU use with 1 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 11.535     | 11.535     | 11.535     |   0.0 | 98.54
+Bond    | 0.001443   | 0.001443   | 0.001443   |   0.0 |  0.01
+Neigh   | 0.10952    | 0.10952    | 0.10952    |   0.0 |  0.94
+Comm    | 0.048796   | 0.048796   | 0.048796   |   0.0 |  0.42
+Output  | 0.00038695 | 0.00038695 | 0.00038695 |   0.0 |  0.00
+Modify  | 0.0067511  | 0.0067511  | 0.0067511  |   0.0 |  0.06
+Other   |            | 0.004243   |            |       |  0.04
+
+Nlocal:    432 ave 432 max 432 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:    9328 ave 9328 max 9328 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    297280 ave 297280 max 297280 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 297280
+Ave neighs/atom = 688.148
+Ave special neighs/atom = 1
+Neighbor list builds = 21
+Dangerous builds = 0
+
+unfix thermoberendsen
+
+# ------------------------ Dynamic Run -------------------------------
+
+run 1000
+Per MPI rank memory allocation (min/avg/max) = 8.59 | 8.59 | 8.59 Mbytes
+Step TotEng PotEng KinEng Temp Press E_pair E_vdwl E_coul E_long E_bond Fnorm Fmax Volume 
+     500   -623.87093   -660.24145    36.370525    1308.7192    410.85324   -662.86944    45.869201   -708.73864            0    2.6279856    10.592785    1.8162326      13990.5 
+     550   -623.95762   -659.99899    36.041371    1296.8752   -442.62446   -662.68699    44.004419   -706.69141            0    2.6879934    10.936057    1.7358509      13990.5 
+     600   -624.04214   -661.21748    37.175332    1337.6785    47.615854   -663.76133    45.009742   -708.77107            0    2.5438549    11.431366    2.1185464      13990.5 
+     650   -623.98279   -661.85255     37.86976    1362.6661   -1708.3823   -664.31138    40.933446   -705.24482            0    2.4588298    9.7960191    1.4159904      13990.5 
+     700   -624.02941   -661.57484     37.54543    1350.9958   -124.09512   -663.95322    44.666402   -708.61962            0    2.3783801    10.518712    2.0854584      13990.5 
+     750   -624.03935   -661.11619    37.076837    1334.1344   -1163.2721   -663.82553      42.1219   -705.94743            0    2.7093404    10.634859    1.9381654      13990.5 
+     800   -623.98709   -659.43647     35.44938    1275.5737   -285.65702   -662.23782    44.650233   -706.88805            0    2.8013482     11.83459    3.4506407      13990.5 
+     850   -623.96081   -661.77923    37.818422    1360.8188   -814.33212   -664.38161    42.860208   -707.24181            0    2.6023728    10.766451    2.9211132      13990.5 
+     900   -623.96136   -662.98419    39.022831     1404.157    308.13105   -665.65877    45.053188   -710.71195            0    2.6745786    11.028799    1.8494322      13990.5 
+     950   -623.91048   -660.63828      36.7278     1321.575   -445.31526   -663.79806     43.94833   -707.74639            0    3.1597729    11.852051    1.8238458      13990.5 
+    1000   -623.90541   -661.03474    37.129332    1336.0233    522.57958   -663.50212    45.863261   -709.36538            0    2.4673781    10.949291    1.8614095      13990.5 
+    1050   -624.01134   -660.99076    36.979422    1330.6291   -1710.2829   -663.67762    41.056102   -704.73372            0    2.6868521     10.86533    2.2154165      13990.5 
+    1100   -624.02639   -660.86661    36.840225    1325.6204    1151.0236    -663.5066    47.207185   -710.71378            0    2.6399847    11.298044    1.8900703      13990.5 
+    1150   -624.04418   -661.29819    37.254006    1340.5094    161.09847   -663.73735    45.222561   -708.95991            0    2.4391651    11.261119    2.2245673      13990.5 
+    1200   -624.08628   -662.39932    38.313043    1378.6168   -1405.0927   -664.93121    41.728112   -706.65933            0    2.5318927    10.458041    2.1153159      13990.5 
+    1250   -624.03036   -661.25661    37.226248    1339.5106    63.407721   -663.72719    45.086961   -708.81415            0    2.4705832    11.766021    2.2297809      13990.5 
+    1300   -623.97475   -661.55998    37.585229    1352.4278   -763.66781   -663.80645    42.870244    -706.6767            0    2.2464703    10.098322    1.7614596      13990.5 
+    1350   -623.93123   -660.30786    36.376631    1308.9389   -2389.8052   -663.04725    39.749029   -702.79628            0    2.7393936    10.879364    2.6622521      13990.5 
+    1400   -623.86468   -658.44644    34.581765    1244.3543    2262.7586   -660.95368     50.10608   -711.05976            0    2.5072434    11.528291    1.7861906      13990.5 
+    1450   -623.85494   -661.47216    37.617217    1353.5789   -1435.0174   -664.13587    41.701767   -705.83764            0    2.6637191    11.297444    2.0038345      13990.5 
+    1500   -623.79928   -659.70124    35.901959    1291.8588   -198.39562   -662.57415    44.358482   -706.93263            0     2.872907    11.075746    2.2821286      13990.5 
+Loop time of 23.4119 on 1 procs for 1000 steps with 432 atoms
+
+Performance: 7.381 ns/day, 3.252 hours/ns, 42.713 timesteps/s
+99.9% CPU use with 1 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 23.069     | 23.069     | 23.069     |   0.0 | 98.54
+Bond    | 0.0029275  | 0.0029275  | 0.0029275  |   0.0 |  0.01
+Neigh   | 0.22821    | 0.22821    | 0.22821    |   0.0 |  0.97
+Comm    | 0.097941   | 0.097941   | 0.097941   |   0.0 |  0.42
+Output  | 0.00074033 | 0.00074033 | 0.00074033 |   0.0 |  0.00
+Modify  | 0.0042015  | 0.0042015  | 0.0042015  |   0.0 |  0.02
+Other   |            | 0.00865    |            |       |  0.04
+
+Nlocal:    432 ave 432 max 432 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:    9388 ave 9388 max 9388 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    297212 ave 297212 max 297212 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 297212
+Ave neighs/atom = 687.991
+Ave special neighs/atom = 1
+Neighbor list builds = 45
+Dangerous builds = 0
+Total wall time: 0:00:35
diff --git a/examples/coreshell/log.22Sep17.coreshell.wolf.g++.4 b/examples/coreshell/log.22Sep17.coreshell.wolf.g++.4
new file mode 100644
index 0000000000000000000000000000000000000000..84f854cf6797e74dc2c4ab25f75796b0baa9c9aa
--- /dev/null
+++ b/examples/coreshell/log.22Sep17.coreshell.wolf.g++.4
@@ -0,0 +1,190 @@
+LAMMPS (22 Sep 2017)
+# Testsystem for core-shell model compared to Mitchel and Finchham
+# Shamelessly copied from in.coreshell.dsf but this script uses Wolf summation.
+
+# ------------------------ INITIALIZATION ----------------------------
+
+units 		metal
+dimension	3
+boundary	p	p	p
+atom_style	full
+
+# ----------------------- ATOM DEFINITION ----------------------------
+
+fix csinfo all property/atom i_CSID
+read_data data.coreshell fix csinfo NULL CS-Info
+  orthogonal box = (0 0 0) to (24.096 24.096 24.096)
+  1 by 2 by 2 MPI processor grid
+  reading atoms ...
+  432 atoms
+  scanning bonds ...
+  1 = max bonds/atom
+  reading bonds ...
+  216 bonds
+  1 = max # of 1-2 neighbors
+  0 = max # of 1-3 neighbors
+  0 = max # of 1-4 neighbors
+  1 = max # of special neighbors
+
+group cores type 1 2
+216 atoms in group cores
+group shells type 3 4
+216 atoms in group shells
+
+neighbor 2.0 bin
+comm_modify vel yes
+
+# ------------------------ FORCE FIELDS ------------------------------
+
+pair_style   born/coul/wolf/cs 0.1 20.0 20.0    # A, rho, sigma=0, C, D
+pair_coeff   * *      0.0 1.000   0.00  0.00   0.00
+pair_coeff   3 3    487.0 0.23768 0.00  1.05   0.50 #Na-Na
+pair_coeff   3 4 145134.0 0.23768 0.00  6.99   8.70 #Na-Cl
+pair_coeff   4 4 405774.0 0.23768 0.00 72.40 145.40 #Cl-Cl
+
+bond_style harmonic
+bond_coeff 1 63.014 0.0
+bond_coeff 2 25.724 0.0
+
+# ------------------------ Equilibration Run -------------------------------
+
+reset_timestep 0
+
+thermo 50
+thermo_style custom step etotal pe ke temp press              epair evdwl ecoul elong ebond fnorm fmax vol
+
+compute CSequ all temp/cs cores shells
+
+# output via chunk method
+
+#compute prop all property/atom i_CSID
+#compute cs_chunk all chunk/atom c_prop
+#compute cstherm all temp/chunk cs_chunk temp internal com yes cdof 3.0
+#fix ave_chunk all ave/time 100 1 100 c_cstherm file chunk.dump mode vector
+
+thermo_modify temp CSequ
+
+# velocity bias option
+
+velocity all create 1427 134 dist gaussian mom yes rot no bias yes temp CSequ
+Neighbor list info ...
+  update every 1 steps, delay 10 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 22
+  ghost atom cutoff = 22
+  binsize = 11, bins = 3 3 3
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair born/coul/wolf/cs, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+velocity all scale 1427 temp CSequ
+
+fix thermoberendsen all temp/berendsen 1427 1427 0.4
+fix nve all nve
+fix_modify thermoberendsen temp CSequ
+
+# 2 fmsec timestep
+
+timestep 0.002
+run 500
+Per MPI rank memory allocation (min/avg/max) = 6.881 | 6.881 | 6.881 Mbytes
+Step TotEng PotEng KinEng Temp Press E_pair E_vdwl E_coul E_long E_bond Fnorm Fmax Volume 
+       0   -637.41039   -677.06805    39.657659         1427   -21302.622   -677.06805    1.6320365   -678.70009            0            0 1.3530839e-14 3.6429193e-15      13990.5 
+      50   -635.62704   -667.67108    32.044047    1153.0397   -4532.6842   -669.45828    37.771018    -707.2293            0     1.787201    9.6848095    2.2028349      13990.5 
+     100   -632.76343   -662.83014    30.066711    1081.8893   -3493.0827   -664.98212    39.195967   -704.17809            0     2.151977    11.060773    2.1578583      13990.5 
+     150   -630.82533   -663.70172    32.876385    1182.9897   -75.706974   -666.12608    46.261038   -712.38712            0    2.4243598    11.746728    2.7663319      13990.5 
+     200   -629.15463   -664.55009    35.395466    1273.6337   -1707.9185   -666.88117    41.794868   -708.67604            0     2.331082    10.596229    3.0031523      13990.5 
+     250   -627.86625   -662.60876    34.742511    1250.1384   -1263.5726     -665.214    43.005742   -708.21974            0    2.6052329    10.572641     1.841604      13990.5 
+     300   -627.10829   -664.12159    37.013298    1331.8481   -1884.8587   -666.39136    40.786141    -707.1775            0    2.2697693    9.4160685    1.2472271      13990.5 
+     350    -626.2729   -665.01858    38.745682    1394.1844   -1433.1302   -667.44315    41.864785   -709.30793            0    2.4245625    10.312641    1.9815612      13990.5 
+     400   -625.54274   -661.84438    36.301638    1306.2404   -355.45544    -664.4483    44.370671   -708.81897            0    2.6039276    11.076154    2.1819703      13990.5 
+     450   -624.88802   -661.12299    36.234964    1303.8413   -2356.6101    -663.6463     39.57847   -703.22477            0     2.523314    9.8995886    1.3113549      13990.5 
+     500   -623.86344   -660.20235    36.338906    1307.5814    462.72862    -662.8515    45.983055   -708.83455            0    2.6491489    10.523732    1.6709561      13990.5 
+Loop time of 4.63313 on 4 procs for 500 steps with 432 atoms
+
+Performance: 18.648 ns/day, 1.287 hours/ns, 107.919 timesteps/s
+99.4% CPU use with 4 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 2.8133     | 3.5934     | 4.4255     |  36.3 | 77.56
+Bond    | 0.00042245 | 0.00050305 | 0.00059825 |   0.0 |  0.01
+Neigh   | 0.0411     | 0.041561   | 0.04202    |   0.2 |  0.90
+Comm    | 0.15622    | 0.9884     | 1.7686     |  69.2 | 21.33
+Output  | 0.00028311 | 0.00031345 | 0.00040011 |   0.0 |  0.01
+Modify  | 0.0039899  | 0.00403    | 0.0040654  |   0.0 |  0.09
+Other   |            | 0.004911   |            |       |  0.11
+
+Nlocal:    108 ave 112 max 106 min
+Histogram: 2 0 0 1 0 0 0 0 0 1
+Nghost:    6531.75 ave 6611 max 6460 min
+Histogram: 1 0 0 1 0 1 0 0 0 1
+Neighs:    74316.2 ave 75641 max 72780 min
+Histogram: 1 0 0 1 0 0 0 0 1 1
+
+Total # of neighbors = 297265
+Ave neighs/atom = 688.113
+Ave special neighs/atom = 1
+Neighbor list builds = 21
+Dangerous builds = 0
+
+unfix thermoberendsen
+
+# ------------------------ Dynamic Run -------------------------------
+
+run 1000
+Per MPI rank memory allocation (min/avg/max) = 6.884 | 7.01 | 7.138 Mbytes
+Step TotEng PotEng KinEng Temp Press E_pair E_vdwl E_coul E_long E_bond Fnorm Fmax Volume 
+     500   -623.86344   -660.20235    36.338906    1307.5814    462.72862    -662.8515    45.983055   -708.83455            0    2.6491489    10.523732    1.6709561      13990.5 
+     550   -623.95339   -660.07946    36.126069    1299.9229   -362.67288    -662.8047    44.180832   -706.98553            0     2.725239     11.07199    2.0461377      13990.5 
+     600   -624.04508   -661.27298    37.227902    1339.5701    110.39823   -663.90927    45.166569   -709.07584            0    2.6362911    11.747923    2.1846828      13990.5 
+     650    -623.9608   -661.50573    37.544934    1350.9779   -1594.4701   -663.91531    41.226571   -705.14188            0    2.4095736    10.022027    1.6264014      13990.5 
+     700   -624.00658   -663.55636    39.549777    1423.1181    -588.9804   -665.88666    43.124023   -709.01068            0    2.3302979     9.924587    2.1027837      13990.5 
+     750   -623.99813   -659.97695    35.978816    1294.6243    -364.1415   -662.75959    43.973556   -706.73315            0     2.782646    11.487832    1.8799695      13990.5 
+     800   -624.01235   -661.04908    37.036728    1332.6911   -85.655852   -663.49204    44.570117   -708.06215            0    2.4429547    11.213149    1.6792926      13990.5 
+     850   -624.02581   -659.39933    35.373519     1272.844   -1090.9179   -662.11969    42.375064   -704.49476            0    2.7203673    11.214629    2.2907474      13990.5 
+     900   -624.09244   -663.33386    39.241421    1412.0225    303.76207   -666.00593    45.332458   -711.33839            0    2.6720667    11.897188    2.0599033      13990.5 
+     950   -624.04544   -661.11578    37.070341    1333.9007    1637.6438   -663.61305     48.65892   -712.27197            0      2.49727    12.343774     2.429225      13990.5 
+    1000   -623.95457   -661.51816    37.563586    1351.6491   -851.77182   -663.83594    42.813468   -706.64941            0     2.317782    10.251422    1.6922864      13990.5 
+    1050   -623.98731   -662.01988    38.032562    1368.5242   -2123.9075   -664.72609    39.917382   -704.64348            0     2.706218     9.600838    1.7365559      13990.5 
+    1100    -623.9653   -659.32023    35.354929    1272.1751   -436.90576   -662.14454    44.057776   -706.20232            0    2.8243167    11.267546    2.6807602      13990.5 
+    1150   -624.02273   -665.13902    41.116293     1479.486   -128.13268    -667.4769    44.013761   -711.49066            0    2.3378768    10.406604    1.5102324      13990.5 
+    1200   -623.97328   -663.08042    39.107135    1407.1905   -539.63284   -665.67946    43.319308   -708.99877            0     2.599042    10.632657    1.4608707      13990.5 
+    1250   -623.92529   -661.81902    37.893723    1363.5284    1708.0621   -664.63624    48.396874   -713.03311            0    2.8172251    11.369352    1.8836979      13990.5 
+    1300   -623.99283   -662.19734    38.204509    1374.7114   -820.77291   -664.34556    42.656202   -707.00176            0    2.1482151    10.186898    1.7081329      13990.5 
+    1350   -623.99798   -658.70752    34.709535    1248.9519    776.28486   -662.01647    46.925412   -708.94188            0    3.3089536    12.457581    2.5027978      13990.5 
+    1400   -623.96941   -662.15959    38.190183    1374.1959    880.21756   -664.90452    46.628183    -711.5327            0    2.7449282    11.918894    3.0466188      13990.5 
+    1450   -623.95068   -662.25435    38.303671    1378.2795   -391.56711    -664.7631    43.644066   -708.40716            0    2.5087493    10.465803    1.8744864      13990.5 
+    1500   -624.00637   -661.48756    37.481185     1348.684    430.69453   -664.32151    45.704366   -710.02587            0    2.8339501    11.650821    2.0752813      13990.5 
+Loop time of 6.7003 on 4 procs for 1000 steps with 432 atoms
+
+Performance: 25.790 ns/day, 0.931 hours/ns, 149.247 timesteps/s
+99.1% CPU use with 4 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 5.9613     | 6.0752     | 6.2064     |   4.1 | 90.67
+Bond    | 0.00087095 | 0.00088993 | 0.00091671 |   0.0 |  0.01
+Neigh   | 0.06494    | 0.064964   | 0.064987   |   0.0 |  0.97
+Comm    | 0.41743    | 0.54863    | 0.6626     |  13.8 |  8.19
+Output  | 0.00053867 | 0.00059268 | 0.00074867 |   0.0 |  0.01
+Modify  | 0.0013288  | 0.0013592  | 0.0013841  |   0.1 |  0.02
+Other   |            | 0.008613   |            |       |  0.13
+
+Nlocal:    108 ave 116 max 102 min
+Histogram: 2 0 0 0 0 0 0 1 0 1
+Nghost:    6504.5 ave 6624 max 6380 min
+Histogram: 1 1 0 0 0 0 0 0 1 1
+Neighs:    74267 ave 79230 max 70993 min
+Histogram: 2 0 0 0 0 1 0 0 0 1
+
+Total # of neighbors = 297068
+Ave neighs/atom = 687.657
+Ave special neighs/atom = 1
+Neighbor list builds = 45
+Dangerous builds = 0
+Total wall time: 0:00:11
diff --git a/examples/melt/in.melt b/examples/melt/in.melt
deleted file mode 100644
index bf6eb735472ef5283346090c472167e01e540ed8..0000000000000000000000000000000000000000
--- a/examples/melt/in.melt
+++ /dev/null
@@ -1,33 +0,0 @@
-# 3d Lennard-Jones melt
-
-units		lj
-atom_style	atomic
-
-lattice		fcc 0.8442
-region		box block 0 $L 0 $L 0 $L
-create_box	1 box
-create_atoms	1 box
-mass		1 1.0
-
-velocity	all create 3.0 87287
-
-pair_style	lj/cut 2.5
-pair_coeff	1 1 1.0 1.0 2.5
-
-neighbor	0.3 bin
-neigh_modify	every 20 delay 0 check no
-
-fix		1 all nve
-
-#dump		id all atom 50 dump.melt
-
-#dump		2 all image 25 image.*.jpg type type &
-#		axes yes 0.8 0.02 view 60 -30
-#dump_modify	2 pad 3
-
-#dump		3 all movie 25 movie.mpg type type &
-#		axes yes 0.8 0.02 view 60 -30
-#dump_modify	3 pad 3
-
-thermo		50
-run		250
diff --git a/examples/peptide/in.peptide b/examples/peptide/in.peptide
deleted file mode 100644
index cdf21636943726560d31b7f38982d6243bac62b8..0000000000000000000000000000000000000000
--- a/examples/peptide/in.peptide
+++ /dev/null
@@ -1,42 +0,0 @@
-# Solvated 5-mer peptide
-
-units		real
-atom_style	full
-
-pair_style	lj/charmm/coul/long 8.0 10.0 10.0
-bond_style      harmonic
-angle_style     charmm
-dihedral_style  charmm
-improper_style  harmonic
-kspace_style	pppm 0.0001
-
-read_data	data.peptide
-
-neighbor	2.0 bin
-neigh_modify	delay 5
-
-timestep	2.0
-
-thermo_style	multi
-thermo		50
-
-fix		1 all nvt temp 275.0 275.0 100.0 tchain 1
-fix		2 all shake 0.0001 10 100 b 4 6 8 10 12 14 18 a 31
-
-group		peptide type <= 12
-
-dump		1 peptide atom 10 dump.peptide
-
-#dump		2 peptide image 25 image.*.jpg type type &
-#		axes yes 0.8 0.02 view 60 -30 bond atom 0.5
-#dump_modify	2 pad 3
-
-#dump		3 peptide movie 25 movie.mpg type type &
-#		axes yes 0.8 0.02 view 60 -30 bond atom 0.5
-#dump_modify	3 pad 3
-
-#compute		bnd all property/local btype batom1 batom2
-#dump		2 peptide local 300 dump.bond index c_bnd[1] c_bnd[2] c_bnd[3]
-
-run		300
-
diff --git a/examples/rigid/bodies.txt b/examples/rigid/bodies.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ddca2c7539b57a1eed2109d954bd5903bc7a765d
--- /dev/null
+++ b/examples/rigid/bodies.txt
@@ -0,0 +1,83 @@
+81
+1 1
+2 1
+3 1
+4 1
+5 1
+6 1
+7 1
+8 1
+9 1
+10 2
+11 2
+12 2
+13 2
+14 2
+15 2
+16 2
+17 2
+18 2
+19 3
+20 3
+21 3
+22 3
+23 3
+24 3
+25 3
+26 3
+27 3
+28 4
+29 4
+30 4
+31 4
+32 4
+33 4
+34 4
+35 4
+36 4
+37 5
+38 5
+39 5
+40 5
+41 5
+42 5
+43 5
+44 5
+45 5
+46 6
+47 6
+48 6
+49 6
+50 6
+51 6
+52 6
+53 6
+54 6
+55 7
+56 7
+57 7
+58 7
+59 7
+60 7
+61 7
+62 7
+63 7
+64 8
+65 8
+66 8
+67 8
+68 8
+69 8
+70 8
+71 8
+72 8
+73 9
+74 9
+75 9
+76 9
+77 9
+78 9
+79 9
+80 9
+81 9
+
diff --git a/examples/rigid/data.rigid-property b/examples/rigid/data.rigid-property
new file mode 100644
index 0000000000000000000000000000000000000000..66fc42d2d2cf6b0e27a1ca0cb4cedbac889b0d75
--- /dev/null
+++ b/examples/rigid/data.rigid-property
@@ -0,0 +1,185 @@
+# LAMMPS data file for rigid bodies
+
+81	atoms		
+1	atom types	
+
+-12 12 xlo xhi
+-12 12 ylo yhi
+-12 12 zlo zhi
+
+Masses			
+
+1 1		
+
+Pair Coeffs			
+
+1 1 1		
+
+Atoms				
+
+1	1	0	0	0
+2	1	0	1	0
+3	1	0	0.5	0
+4	1	0.5	0	0
+5	1	0.5	0.5	1
+6	1	1	0.5	0
+7	1	0.5	1	0
+8	1	1	0	0
+9	1	1	1	0
+10	1	2	1	0
+11	1	1	2	0
+12	1	1.5	2	0
+13	1	1.5	1	0
+14	1	1	1.5	0
+15	1	1.5	1.5	1
+16	1	2	1.5	0
+17	1	2	2	0
+18	1	2	3	0
+19	1	2	2.5	0
+20	1	2.5	2	0
+21	1	2.5	2.5	1
+22	1	3	2.5	0
+23	1	2.5	3	0
+24	1	3	2	0
+25	1	3	3	0
+26	1	4	3	0
+27	1	3	4	0
+28	1	3.5	4	0
+29	1	3.5	3	0
+30	1	3	3.5	0
+31	1	3.5	3.5	1
+32	1	4	3.5	0
+33	1	4	4	0
+34	1	4	5	0
+35	1	4	4.5	0
+36	1	4.5	4	0
+37	1	4.5	4.5	1
+38	1	5	4.5	0
+39	1	4.5	5	0
+40	1	5	4	0
+41	1	5	5	0
+42	1	6	5	0
+43	1	5	6	0
+44	1	5.5	6	0
+45	1	5.5	5	0
+46	1	5	5.5	0
+47	1	5.5	5.5	1
+48	1	6	5.5	0
+49	1	6	6	0
+50	1	6	7	0
+51	1	6	6.5	0
+52	1	6.5	6	0
+53	1	6.5	6.5	1
+54	1	7	6.5	0
+55	1	6.5	7	0
+56	1	7	6	0
+57	1	7	7	0
+58	1	8	7	0
+59	1	7	8	0
+60	1	7.5	8	0
+61	1	7.5	7	0
+62	1	7	7.5	0
+63	1	7.5	7.5	1
+64	1	8	7.5	0
+65	1	8	8	0
+66	1	8	9	0
+67	1	8	8.5	0
+68	1	8.5	8	0
+69	1	8.5	8.5	1
+70	1	9	8.5	0
+71	1	8.5	9	0
+72	1	9	8	0
+73	1	9	9	0
+74	1	10	9	0
+75	1	9	10	0
+76	1	9.5	10	0
+77	1	9.5	9	0
+78	1	9	9.5	0
+79	1	9.5	9.5	1
+80	1	10	9.5	0
+81	1	10	10	0
+
+Bodies
+
+1 1
+2 1
+3 1
+4 1
+5 1
+6 1
+7 1
+8 1
+9 1
+10 2
+11 2
+12 2
+13 2
+14 2
+15 2
+16 2
+17 2
+18 2
+19 3
+20 3
+21 3
+22 3
+23 3
+24 3
+25 3
+26 3
+27 3
+28 4
+29 4
+30 4
+31 4
+32 4
+33 4
+34 4
+35 4
+36 4
+37 5
+38 5
+39 5
+40 5
+41 5
+42 5
+43 5
+44 5
+45 5
+46 6
+47 6
+48 6
+49 6
+50 6
+51 6
+52 6
+53 6
+54 6
+55 7
+56 7
+57 7
+58 7
+59 7
+60 7
+61 7
+62 7
+63 7
+64 8
+65 8
+66 8
+67 8
+68 8
+69 8
+70 8
+71 8
+72 8
+73 9
+74 9
+75 9
+76 9
+77 9
+78 9
+79 9
+80 9
+81 9
+
diff --git a/examples/rigid/in.rigid b/examples/rigid/in.rigid-atomfile
similarity index 86%
rename from examples/rigid/in.rigid
rename to examples/rigid/in.rigid-atomfile
index 4482395a6c1f7a918cea7c4eb98c386c1da3a989..8817386c79f7df174b1a538b107b88da5897d5b4 100644
--- a/examples/rigid/in.rigid
+++ b/examples/rigid/in.rigid-atomfile
@@ -2,6 +2,7 @@
 
 units		lj
 atom_style	atomic
+atom_modify  map array
 
 pair_style	lj/cut 2.5
 
@@ -9,6 +10,7 @@ read_data	data.rigid
 
 velocity 	all create 100.0 4928459
 
+
 # unconnected bodies
 
 group		clump1 id <> 1 9
@@ -21,11 +23,8 @@ group		clump7 id <> 55 63
 group		clump8 id <> 64 72
 group		clump9 id <> 73 81
 
-#fix 1 all rigid group 9 clump1 clump2 clump3 clump4 clump5 &
-#	  	      clump6 clump7 clump8 clump9
-
-fix 1 all rigid/small group 9 clump1 clump2 clump3 clump4 clump5 &
-	  	      clump6 clump7 clump8 clump9
+variable bodies atomfile bodies.txt
+fix 1 all rigid custom v_bodies
 
 # 1 chain of connected bodies
 
@@ -67,9 +66,9 @@ neigh_modify	exclude group clump7 clump7
 neigh_modify	exclude group clump8 clump8
 neigh_modify	exclude group clump9 clump9
 
-thermo		10000
+thermo		100
 
-dump		1 all atom 50 dump.rigid
+#dump		1 all atom 50 dump.rigid
 
 #dump		2 all image 100 image.*.jpg type type &
 #		axes yes 0.8 0.02 view 60 -30
@@ -80,5 +79,5 @@ dump		1 all atom 50 dump.rigid
 #dump_modify	3 pad 5
 
 timestep 	0.0001
-thermo		50000
-run		100000
+thermo		50
+run		10000
diff --git a/examples/rigid/in.rigid-atomvar b/examples/rigid/in.rigid-atomvar
new file mode 100644
index 0000000000000000000000000000000000000000..994ab1d28432db3a934cac421f419d6d5adbf127
--- /dev/null
+++ b/examples/rigid/in.rigid-atomvar
@@ -0,0 +1,83 @@
+# Simple rigid body system
+
+units		lj
+atom_style	atomic
+atom_modify  map array
+
+pair_style	lj/cut 2.5
+
+read_data	data.rigid
+
+velocity 	all create 100.0 4928459
+
+
+# unconnected bodies
+
+group		clump1 id <> 1 9
+group		clump2 id <> 10 18
+group		clump3 id <> 19 27
+group		clump4 id <> 28 36
+group		clump5 id <> 37 45
+group		clump6 id <> 46 54
+group		clump7 id <> 55 63
+group		clump8 id <> 64 72
+group		clump9 id <> 73 81
+
+variable bodies atom 1.0*gmask(clump1)+2.0*gmask(clump2)+3.0*gmask(clump3)+4.0*gmask(clump4)+5.0*gmask(clump5)+6.0*gmask(clump6)+7.0*gmask(clump7)+8.0*gmask(clump8)+9.0*gmask(clump9)
+fix 1 all rigid custom v_bodies
+
+# 1 chain of connected bodies
+
+#group		clump1 id <> 1 9
+#group		clump2 id <> 9 18
+#group		clump3 id <> 18 27
+#group		clump4 id <> 27 36
+#group		clump5 id <> 36 45
+#group		clump6 id <> 45 54
+#group		clump7 id <> 54 63
+#group		clump8 id <> 63 72
+#group		clump9 id <> 72 81
+
+#fix 1 all poems group clump1 clump2 clump3 clump4 clump5 &
+#	  	      clump6 clump7 clump8 clump9
+
+# 2 chains of connected bodies
+
+#group		clump1 id <> 1 9
+#group		clump2 id <> 9 18
+#group		clump3 id <> 18 27
+#group		clump4 id <> 27 36
+#group		clump5 id <> 37 45
+#group		clump6 id <> 45 54
+#group		clump7 id <> 54 63
+#group		clump8 id <> 63 72
+#group		clump9 id <> 72 81
+
+#fix 1 all poems group clump1 clump2 clump3 clump4
+#fix 2 all poems group clump5 clump6 clump7 clump8 clump9
+
+neigh_modify	exclude group clump1 clump1
+neigh_modify	exclude group clump2 clump2
+neigh_modify	exclude group clump3 clump3
+neigh_modify	exclude group clump4 clump4
+neigh_modify	exclude group clump5 clump5
+neigh_modify	exclude group clump6 clump6
+neigh_modify	exclude group clump7 clump7
+neigh_modify	exclude group clump8 clump8
+neigh_modify	exclude group clump9 clump9
+
+thermo		100
+
+#dump		1 all atom 50 dump.rigid
+
+#dump		2 all image 100 image.*.jpg type type &
+#		axes yes 0.8 0.02 view 60 -30
+#dump_modify	2 pad 5
+
+#dump		3 all movie 100 movie.mpg type type &
+#		axes yes 0.8 0.02 view 60 -30
+#dump_modify	3 pad 5
+
+timestep 	0.0001
+thermo		50
+run		10000
diff --git a/examples/rigid/in.rigid-property b/examples/rigid/in.rigid-property
new file mode 100644
index 0000000000000000000000000000000000000000..53d62776e6764c4e22573324f4bc69da8cb79d7a
--- /dev/null
+++ b/examples/rigid/in.rigid-property
@@ -0,0 +1,85 @@
+# Simple rigid body system
+
+units		lj
+atom_style	atomic
+atom_modify  map array
+
+pair_style	lj/cut 2.5
+
+fix 0 all property/atom i_bodies
+
+read_data	data.rigid-property fix 0 NULL Bodies
+
+velocity 	all create 100.0 4928459
+
+
+# unconnected bodies
+
+group		clump1 id <> 1 9
+group		clump2 id <> 10 18
+group		clump3 id <> 19 27
+group		clump4 id <> 28 36
+group		clump5 id <> 37 45
+group		clump6 id <> 46 54
+group		clump7 id <> 55 63
+group		clump8 id <> 64 72
+group		clump9 id <> 73 81
+
+# assemble bodies from per-atom custom integer property bodies
+fix 1 all rigid custom i_bodies
+
+# 1 chain of connected bodies
+
+#group		clump1 id <> 1 9
+#group		clump2 id <> 9 18
+#group		clump3 id <> 18 27
+#group		clump4 id <> 27 36
+#group		clump5 id <> 36 45
+#group		clump6 id <> 45 54
+#group		clump7 id <> 54 63
+#group		clump8 id <> 63 72
+#group		clump9 id <> 72 81
+
+#fix 1 all poems group clump1 clump2 clump3 clump4 clump5 &
+#	  	      clump6 clump7 clump8 clump9
+
+# 2 chains of connected bodies
+
+#group		clump1 id <> 1 9
+#group		clump2 id <> 9 18
+#group		clump3 id <> 18 27
+#group		clump4 id <> 27 36
+#group		clump5 id <> 37 45
+#group		clump6 id <> 45 54
+#group		clump7 id <> 54 63
+#group		clump8 id <> 63 72
+#group		clump9 id <> 72 81
+
+#fix 1 all poems group clump1 clump2 clump3 clump4
+#fix 2 all poems group clump5 clump6 clump7 clump8 clump9
+
+neigh_modify	exclude group clump1 clump1
+neigh_modify	exclude group clump2 clump2
+neigh_modify	exclude group clump3 clump3
+neigh_modify	exclude group clump4 clump4
+neigh_modify	exclude group clump5 clump5
+neigh_modify	exclude group clump6 clump6
+neigh_modify	exclude group clump7 clump7
+neigh_modify	exclude group clump8 clump8
+neigh_modify	exclude group clump9 clump9
+
+thermo		100
+
+#dump		1 all atom 50 dump.rigid
+
+#dump		2 all image 100 image.*.jpg type type &
+#		axes yes 0.8 0.02 view 60 -30
+#dump_modify	2 pad 5
+
+#dump		3 all movie 100 movie.mpg type type &
+#		axes yes 0.8 0.02 view 60 -30
+#dump_modify	3 pad 5
+
+timestep 	0.0001
+thermo		50
+run		10000
diff --git a/lib/colvars/Makefile.common b/lib/colvars/Makefile.common
index f47403f771f2095928d528d6872b3644a9a164d3..e3fa4662e638ef1668a21259f7fb5c021c599e9a 100644
--- a/lib/colvars/Makefile.common
+++ b/lib/colvars/Makefile.common
@@ -12,7 +12,7 @@
 ifeq ($(COLVARS_DEBUG),)
 COLVARS_DEBUG_INCFLAGS =
 else
-COLVARS_DEBUG_INCFLAGS= -DCOLVARS_DEBUG
+COLVARS_DEBUG_INCFLAGS = -DCOLVARS_DEBUG
 endif
 
 COLVARS_INCFLAGS = $(COLVARS_DEBUG_INCFLAGS) $(COLVARS_PYTHON_INCFLAGS)
@@ -21,6 +21,7 @@ COLVARS_INCFLAGS = $(COLVARS_DEBUG_INCFLAGS) $(COLVARS_PYTHON_INCFLAGS)
 .SUFFIXES:
 .SUFFIXES: .cpp .o
 
+
 COLVARS_SRCS = \
         colvaratoms.cpp \
         colvarbias_abf.cpp \
@@ -45,21 +46,32 @@ COLVARS_SRCS = \
         colvartypes.cpp \
         colvarvalue.cpp
 
-COLVARS_OBJS = $(COLVARS_SRCS:.cpp=.o)
+LEPTON_SRCS = \
+	lepton/src/CompiledExpression.cpp lepton/src/ExpressionTreeNode.cpp \
+	lepton/src/ParsedExpression.cpp lepton/src/ExpressionProgram.cpp    \
+	lepton/src/Operation.cpp lepton/src/Parser.cpp
+
+LEPTON_OBJS = \
+	lepton/src/CompiledExpression.o lepton/src/ExpressionTreeNode.o \
+	lepton/src/ParsedExpression.o lepton/src/ExpressionProgram.o    \
+	lepton/src/Operation.o lepton/src/Parser.o
+
+COLVARS_OBJS = $(COLVARS_SRCS:.cpp=.o) $(LEPTON_OBJS)
 
-.cpp.o:
-	$(CXX) $(CXXFLAGS) $(COLVARS_INCFLAGS) -c $<
+%.o: %.cpp
+	$(CXX) $(CXXFLAGS) $(COLVARS_INCFLAGS) -Ilepton/include -DLEPTON -c -o $@ $<
 
 $(COLVARS_LIB):	Makefile.deps $(COLVARS_OBJS)
-	$(AR) $(ARFLAGS) $(COLVARS_LIB) $(COLVARS_OBJS)
+	$(AR) $(ARFLAGS) $(COLVARS_LIB) $(COLVARS_OBJS) $(LEPTON_OBJS)
 
 
 Makefile.deps: $(COLVARS_SRCS)
 	@echo > $@
 	@for src in $^ ; do \
 	  obj=`basename $$src .cpp`.o ; \
-	  $(CXX) -MM $(COLVARS_INCFLAGS) \
+	  $(CXX) -MM $(COLVARS_INCFLAGS)  -Ilepton/include -DLEPTON \
 	    -MT '$$(COLVARS_OBJ_DIR)'$$obj $$src >> $@ ; \
 	  done
 
 include Makefile.deps
+include Makefile.lepton.deps # Hand-generated
diff --git a/lib/colvars/Makefile.deps b/lib/colvars/Makefile.deps
index f463da5f86f2b40e5a42230fff856c32cd0f2349..e0c768dbc9b042b3ae14731b6319929af9a20705 100644
--- a/lib/colvars/Makefile.deps
+++ b/lib/colvars/Makefile.deps
@@ -4,73 +4,232 @@ $(COLVARS_OBJ_DIR)colvaratoms.o: colvaratoms.cpp colvarmodule.h \
  colvarparse.h colvaratoms.h colvardeps.h
 $(COLVARS_OBJ_DIR)colvarbias_abf.o: colvarbias_abf.cpp colvarmodule.h \
  colvars_version.h colvartypes.h colvarproxy.h colvarvalue.h colvar.h \
- colvarparse.h colvardeps.h colvarbias_abf.h colvarbias.h colvargrid.h
+ colvarparse.h colvardeps.h lepton/include/Lepton.h \
+ lepton/include/lepton/CompiledExpression.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/windowsIncludes.h \
+ lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/ExpressionProgram.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/Exception.h \
+ lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
+ colvarbias_abf.h colvarbias.h colvargrid.h colvar_UIestimator.h
 $(COLVARS_OBJ_DIR)colvarbias_alb.o: colvarbias_alb.cpp colvarmodule.h \
  colvars_version.h colvartypes.h colvarproxy.h colvarvalue.h \
- colvarbias_alb.h colvar.h colvarparse.h colvardeps.h colvarbias.h
+ colvarbias_alb.h colvar.h colvarparse.h colvardeps.h \
+ lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/windowsIncludes.h \
+ lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/ExpressionProgram.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/Exception.h \
+ lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
+ colvarbias.h
 $(COLVARS_OBJ_DIR)colvarbias.o: colvarbias.cpp colvarmodule.h \
  colvars_version.h colvartypes.h colvarproxy.h colvarvalue.h colvarbias.h \
- colvar.h colvarparse.h colvardeps.h
+ colvar.h colvarparse.h colvardeps.h lepton/include/Lepton.h \
+ lepton/include/lepton/CompiledExpression.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/windowsIncludes.h \
+ lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/ExpressionProgram.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/Exception.h \
+ lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
+ colvargrid.h
 $(COLVARS_OBJ_DIR)colvarbias_histogram.o: colvarbias_histogram.cpp \
  colvarmodule.h colvars_version.h colvartypes.h colvarproxy.h \
- colvarvalue.h colvar.h colvarparse.h colvardeps.h colvarbias_histogram.h \
- colvarbias.h colvargrid.h
+ colvarvalue.h colvar.h colvarparse.h colvardeps.h \
+ lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/windowsIncludes.h \
+ lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/ExpressionProgram.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/Exception.h \
+ lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
+ colvarbias_histogram.h colvarbias.h colvargrid.h
 $(COLVARS_OBJ_DIR)colvarbias_meta.o: colvarbias_meta.cpp colvar.h \
  colvarmodule.h colvars_version.h colvartypes.h colvarproxy.h \
- colvarvalue.h colvarparse.h colvardeps.h colvarbias_meta.h colvarbias.h \
- colvargrid.h
+ colvarvalue.h colvarparse.h colvardeps.h lepton/include/Lepton.h \
+ lepton/include/lepton/CompiledExpression.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/windowsIncludes.h \
+ lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/ExpressionProgram.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/Exception.h \
+ lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
+ colvarbias_meta.h colvarbias.h colvargrid.h
 $(COLVARS_OBJ_DIR)colvarbias_restraint.o: colvarbias_restraint.cpp \
  colvarmodule.h colvars_version.h colvartypes.h colvarproxy.h \
  colvarvalue.h colvarbias_restraint.h colvarbias.h colvar.h colvarparse.h \
- colvardeps.h
+ colvardeps.h lepton/include/Lepton.h \
+ lepton/include/lepton/CompiledExpression.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/windowsIncludes.h \
+ lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/ExpressionProgram.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/Exception.h \
+ lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h
 $(COLVARS_OBJ_DIR)colvarcomp_angles.o: colvarcomp_angles.cpp \
  colvarmodule.h colvars_version.h colvartypes.h colvarproxy.h \
- colvarvalue.h colvar.h colvarparse.h colvardeps.h colvarcomp.h \
- colvaratoms.h
+ colvarvalue.h colvar.h colvarparse.h colvardeps.h \
+ lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/windowsIncludes.h \
+ lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/ExpressionProgram.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/Exception.h \
+ lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
+ colvarcomp.h colvaratoms.h
 $(COLVARS_OBJ_DIR)colvarcomp_coordnums.o: colvarcomp_coordnums.cpp \
  colvarmodule.h colvars_version.h colvartypes.h colvarproxy.h \
  colvarvalue.h colvarparse.h colvaratoms.h colvardeps.h colvar.h \
+ lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/windowsIncludes.h \
+ lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/ExpressionProgram.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/Exception.h \
+ lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
  colvarcomp.h
 $(COLVARS_OBJ_DIR)colvarcomp.o: colvarcomp.cpp colvarmodule.h \
  colvars_version.h colvartypes.h colvarproxy.h colvarvalue.h colvar.h \
- colvarparse.h colvardeps.h colvarcomp.h colvaratoms.h
+ colvarparse.h colvardeps.h lepton/include/Lepton.h \
+ lepton/include/lepton/CompiledExpression.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/windowsIncludes.h \
+ lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/ExpressionProgram.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/Exception.h \
+ lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
+ colvarcomp.h colvaratoms.h
 $(COLVARS_OBJ_DIR)colvarcomp_distances.o: colvarcomp_distances.cpp \
  colvarmodule.h colvars_version.h colvartypes.h colvarproxy.h \
- colvarvalue.h colvarparse.h colvar.h colvardeps.h colvarcomp.h \
- colvaratoms.h
+ colvarvalue.h colvarparse.h colvar.h colvardeps.h \
+ lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/windowsIncludes.h \
+ lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/ExpressionProgram.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/Exception.h \
+ lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
+ colvarcomp.h colvaratoms.h
 $(COLVARS_OBJ_DIR)colvarcomp_protein.o: colvarcomp_protein.cpp \
  colvarmodule.h colvars_version.h colvartypes.h colvarproxy.h \
- colvarvalue.h colvarparse.h colvar.h colvardeps.h colvarcomp.h \
- colvaratoms.h
+ colvarvalue.h colvarparse.h colvar.h colvardeps.h \
+ lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/windowsIncludes.h \
+ lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/ExpressionProgram.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/Exception.h \
+ lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
+ colvarcomp.h colvaratoms.h
 $(COLVARS_OBJ_DIR)colvarcomp_rotations.o: colvarcomp_rotations.cpp \
  colvarmodule.h colvars_version.h colvartypes.h colvarproxy.h \
- colvarvalue.h colvarparse.h colvar.h colvardeps.h colvarcomp.h \
- colvaratoms.h
+ colvarvalue.h colvarparse.h colvar.h colvardeps.h \
+ lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/windowsIncludes.h \
+ lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/ExpressionProgram.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/Exception.h \
+ lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
+ colvarcomp.h colvaratoms.h
 $(COLVARS_OBJ_DIR)colvar.o: colvar.cpp colvarmodule.h colvars_version.h \
  colvartypes.h colvarproxy.h colvarvalue.h colvarparse.h colvar.h \
- colvardeps.h colvarcomp.h colvaratoms.h colvarscript.h colvarbias.h
+ colvardeps.h lepton/include/Lepton.h \
+ lepton/include/lepton/CompiledExpression.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/windowsIncludes.h \
+ lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/ExpressionProgram.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/Exception.h \
+ lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
+ colvarcomp.h colvaratoms.h colvarscript.h colvarbias.h
 $(COLVARS_OBJ_DIR)colvardeps.o: colvardeps.cpp colvardeps.h \
  colvarmodule.h colvars_version.h colvartypes.h colvarproxy.h \
  colvarvalue.h colvarparse.h
 $(COLVARS_OBJ_DIR)colvargrid.o: colvargrid.cpp colvarmodule.h \
  colvars_version.h colvartypes.h colvarproxy.h colvarvalue.h \
- colvarparse.h colvar.h colvardeps.h colvarcomp.h colvaratoms.h \
- colvargrid.h
+ colvarparse.h colvar.h colvardeps.h lepton/include/Lepton.h \
+ lepton/include/lepton/CompiledExpression.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/windowsIncludes.h \
+ lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/ExpressionProgram.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/Exception.h \
+ lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
+ colvarcomp.h colvaratoms.h colvargrid.h
 $(COLVARS_OBJ_DIR)colvarmodule.o: colvarmodule.cpp colvarmodule.h \
  colvars_version.h colvartypes.h colvarproxy.h colvarvalue.h \
- colvarparse.h colvar.h colvardeps.h colvarbias.h colvarbias_abf.h \
- colvargrid.h colvarbias_alb.h colvarbias_histogram.h colvarbias_meta.h \
- colvarbias_restraint.h colvarscript.h colvaratoms.h
+ colvarparse.h colvar.h colvardeps.h lepton/include/Lepton.h \
+ lepton/include/lepton/CompiledExpression.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/windowsIncludes.h \
+ lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/ExpressionProgram.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/Exception.h \
+ lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
+ colvarbias.h colvarbias_abf.h colvargrid.h colvar_UIestimator.h \
+ colvarbias_alb.h colvarbias_histogram.h colvarbias_meta.h \
+ colvarbias_restraint.h colvarscript.h colvaratoms.h colvarcomp.h
 $(COLVARS_OBJ_DIR)colvarparse.o: colvarparse.cpp colvarmodule.h \
  colvars_version.h colvartypes.h colvarproxy.h colvarvalue.h \
  colvarparse.h
 $(COLVARS_OBJ_DIR)colvarproxy.o: colvarproxy.cpp colvarmodule.h \
  colvars_version.h colvartypes.h colvarproxy.h colvarvalue.h \
  colvarscript.h colvarbias.h colvar.h colvarparse.h colvardeps.h \
+ lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/windowsIncludes.h \
+ lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/ExpressionProgram.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/Exception.h \
+ lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h \
  colvaratoms.h
 $(COLVARS_OBJ_DIR)colvarscript.o: colvarscript.cpp colvarscript.h \
  colvarmodule.h colvars_version.h colvartypes.h colvarproxy.h \
- colvarvalue.h colvarbias.h colvar.h colvarparse.h colvardeps.h
+ colvarvalue.h colvarbias.h colvar.h colvarparse.h colvardeps.h \
+ lepton/include/Lepton.h lepton/include/lepton/CompiledExpression.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/windowsIncludes.h \
+ lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/ExpressionProgram.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/Exception.h \
+ lepton/include/lepton/ParsedExpression.h lepton/include/lepton/Parser.h
 $(COLVARS_OBJ_DIR)colvartypes.o: colvartypes.cpp colvarmodule.h \
  colvars_version.h colvartypes.h colvarproxy.h colvarvalue.h \
  colvarparse.h
diff --git a/lib/colvars/Makefile.lepton.deps b/lib/colvars/Makefile.lepton.deps
new file mode 100644
index 0000000000000000000000000000000000000000..93c3912384b7bc41f4900b481903aa0e6740dd3b
--- /dev/null
+++ b/lib/colvars/Makefile.lepton.deps
@@ -0,0 +1,40 @@
+lepton/src/CompiledExpression.o: lepton/src/CompiledExpression.cpp \
+ lepton/include/lepton/CompiledExpression.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/windowsIncludes.h \
+ lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/Exception.h \
+ lepton/include/lepton/ParsedExpression.h
+lepton/src/ExpressionProgram.o: lepton/src/ExpressionProgram.cpp \
+ lepton/include/lepton/ExpressionProgram.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/windowsIncludes.h \
+ lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/Exception.h \
+ lepton/include/lepton/ParsedExpression.h
+lepton/src/ExpressionTreeNode.o: lepton/src/ExpressionTreeNode.cpp \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/windowsIncludes.h \
+ lepton/include/lepton/Exception.h lepton/include/lepton/Operation.h \
+ lepton/include/lepton/CustomFunction.h lepton/include/lepton/Exception.h
+lepton/src/Operation.o: lepton/src/Operation.cpp \
+ lepton/include/lepton/Operation.h \
+ lepton/include/lepton/windowsIncludes.h \
+ lepton/include/lepton/CustomFunction.h lepton/include/lepton/Exception.h \
+ lepton/include/lepton/ExpressionTreeNode.h lepton/src/MSVC_erfc.h
+lepton/src/ParsedExpression.o: lepton/src/ParsedExpression.cpp \
+ lepton/include/lepton/ParsedExpression.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/windowsIncludes.h \
+ lepton/include/lepton/CompiledExpression.h \
+ lepton/include/lepton/ExpressionProgram.h \
+ lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/Exception.h
+lepton/src/Parser.o: lepton/src/Parser.cpp \
+ lepton/include/lepton/Parser.h lepton/include/lepton/windowsIncludes.h \
+ lepton/include/lepton/CustomFunction.h lepton/include/lepton/Exception.h \
+ lepton/include/lepton/ExpressionTreeNode.h \
+ lepton/include/lepton/Operation.h lepton/include/lepton/CustomFunction.h \
+ lepton/include/lepton/Exception.h \
+ lepton/include/lepton/ParsedExpression.h \
+ lepton/include/lepton/ExpressionTreeNode.h
diff --git a/lib/colvars/README b/lib/colvars/README
index 5df9612dfa3b6f73af79137701fdfae4bb90158b..087528748b6b2e80b6d76adcd1bb95b21b42c020 100644
--- a/lib/colvars/README
+++ b/lib/colvars/README
@@ -47,6 +47,10 @@ correct for your system, else the LAMMPS build will likely fail.
 If you want to set a debug flag recognized by the library, the
 settings in Makefile.common should work.
 
+Note: some Colvars functions use the Lepton mathematical expression parser,
+which is here included (no additional steps required).  For more details, see:
+  https://simtk.org/projects/lepton
+
 
 ## Documentation
 
diff --git a/lib/colvars/colvar.cpp b/lib/colvars/colvar.cpp
index d23bd852aaf8be1fe81c41039c9d577af87c7384..ce76b3b9eb2c8bd5f37b85af332611fa7ca963b3 100644
--- a/lib/colvars/colvar.cpp
+++ b/lib/colvars/colvar.cpp
@@ -1008,6 +1008,8 @@ int colvar::calc()
 
 int colvar::calc_cvcs(int first_cvc, size_t num_cvcs)
 {
+  colvarproxy *proxy = cvm::main()->proxy;
+
   int error_code = COLVARS_OK;
   if (cvm::debug())
     cvm::log("Calculating colvar \""+this->name+"\", components "+
@@ -1018,14 +1020,18 @@ int colvar::calc_cvcs(int first_cvc, size_t num_cvcs)
     return error_code;
   }
 
-  if (cvm::step_relative() > 0) {
-    // Total force depends on Jacobian derivative from previous timestep
+  if ((cvm::step_relative() > 0) && (!proxy->total_forces_same_step())){
+    // Use Jacobian derivative from previous timestep
     error_code |= calc_cvc_total_force(first_cvc, num_cvcs);
   }
   // atom coordinates are updated by the next line
   error_code |= calc_cvc_values(first_cvc, num_cvcs);
   error_code |= calc_cvc_gradients(first_cvc, num_cvcs);
   error_code |= calc_cvc_Jacobians(first_cvc, num_cvcs);
+  if (proxy->total_forces_same_step()){
+    // Use Jacobian derivative from this timestep
+    error_code |= calc_cvc_total_force(first_cvc, num_cvcs);
+  }
 
   if (cvm::debug())
     cvm::log("Done calculating colvar \""+this->name+"\".\n");
@@ -1043,6 +1049,7 @@ int colvar::collect_cvc_data()
 
   if (cvm::step_relative() > 0) {
     // Total force depends on Jacobian derivative from previous timestep
+    // collect_cvc_total_forces() uses the previous value of jd
     error_code |= collect_cvc_total_forces();
   }
   error_code |= collect_cvc_values();
@@ -1138,7 +1145,7 @@ int colvar::collect_cvc_values()
       if (!cvcs[i]->is_enabled()) continue;
       x += (cvcs[i])->sup_coeff *
       ( ((cvcs[i])->sup_np != 1) ?
-        std::pow((cvcs[i])->value().real_value, (cvcs[i])->sup_np) :
+        cvm::integer_power((cvcs[i])->value().real_value, (cvcs[i])->sup_np) :
         (cvcs[i])->value().real_value );
     }
   } else {
@@ -1219,7 +1226,7 @@ int colvar::collect_cvc_gradients()
       if (!cvcs[i]->is_enabled()) continue;
       // Coefficient: d(a * x^n) = a * n * x^(n-1) * dx
       cvm::real coeff = (cvcs[i])->sup_coeff * cvm::real((cvcs[i])->sup_np) *
-        std::pow((cvcs[i])->value().real_value, (cvcs[i])->sup_np-1);
+        cvm::integer_power((cvcs[i])->value().real_value, (cvcs[i])->sup_np-1);
 
       for (size_t j = 0; j < cvcs[i]->atom_groups.size(); j++) {
 
@@ -1471,9 +1478,15 @@ cvm::real colvar::update_forces_energy()
     // Coupling force is a slow force, to be applied to atomic coords impulse-style
     f *= cvm::real(time_step_factor);
 
-    // The total force acting on the extended variable is f_ext
-    // This will be used in the next timestep
-    ft_reported = f_ext;
+    if (is_enabled(f_cv_subtract_applied_force)) {
+      // Report a "system" force without the biases on this colvar
+      // that is, just the spring force
+      ft_reported = (-0.5 * ext_force_k) * this->dist2_lgrad(xr, x);
+    } else {
+      // The total force acting on the extended variable is f_ext
+      // This will be used in the next timestep
+      ft_reported = f_ext;
+    }
 
     // leapfrog: starting from x_i, f_i, v_(i-1/2)
     vr  += (0.5 * dt) * f_ext / ext_mass;
@@ -1580,9 +1593,9 @@ void colvar::communicate_forces()
     for (i = 0; i < cvcs.size(); i++) {
       if (!cvcs[i]->is_enabled()) continue;
       (cvcs[i])->apply_force(f * (cvcs[i])->sup_coeff *
-                              cvm::real((cvcs[i])->sup_np) *
-                              (std::pow((cvcs[i])->value().real_value,
-                                      (cvcs[i])->sup_np-1)) );
+                             cvm::real((cvcs[i])->sup_np) *
+                             (cvm::integer_power((cvcs[i])->value().real_value,
+                                                 (cvcs[i])->sup_np-1)) );
     }
 
   } else {
diff --git a/lib/colvars/colvar.h b/lib/colvars/colvar.h
index dfa9e093a537b797284909e2d7a468571cb8cb98..20dad2771b6c25afef9c4667ca23f24cfe61d46d 100644
--- a/lib/colvars/colvar.h
+++ b/lib/colvars/colvar.h
@@ -60,7 +60,10 @@ public:
 
   /// \brief Current actual value (not extended DOF)
   colvarvalue const & actual_value() const;
-
+  
+  /// \brief Force constant of the spring
+  cvm::real const & force_constant() const;
+   
   /// \brief Current velocity (previously set by calc() or by read_traj())
   colvarvalue const & velocity() const;
 
@@ -96,6 +99,12 @@ public:
   {
     return cv_features;
   }
+  static void delete_features() {
+    for (size_t i=0; i < cv_features.size(); i++) {
+      delete cv_features[i];
+    }
+    cv_features.clear();
+  }
 
   /// Implements possible actions to be carried out
   /// when a given feature is enabled
@@ -592,6 +601,10 @@ public:
   }
 };
 
+inline cvm::real const & colvar::force_constant() const
+{
+  return ext_force_k;
+}
 
 inline colvarvalue const & colvar::value() const
 {
diff --git a/lib/colvars/colvar_UIestimator.h b/lib/colvars/colvar_UIestimator.h
new file mode 100644
index 0000000000000000000000000000000000000000..7fc7f870a10932d176525bb3c6c42d574a572fb1
--- /dev/null
+++ b/lib/colvars/colvar_UIestimator.h
@@ -0,0 +1,736 @@
+// -*- c++ -*-
+
+// This file is part of the Collective Variables module (Colvars).
+// The original version of Colvars and its updates are located at:
+// https://github.com/colvars/colvars
+// Please update all Colvars source files before making any changes.
+// If you wish to distribute your changes, please submit them to the
+// Colvars repository at GitHub.
+
+#ifndef COLVAR_UIESTIMATOR_H
+#define COLVAR_UIESTIMATOR_H
+
+#include <cmath>
+#include <vector>
+#include <iostream>
+#include <fstream>
+#include <string>
+
+#include <typeinfo>
+
+// only for colvar module!
+// when integrated into other code, just remove this line and "...cvm::backup_file(...)"
+#include "colvarmodule.h"
+
+namespace UIestimator {
+    const int Y_SIZE = 21;            // defines the range of extended CV with respect to a given CV
+                                      // For example, CV=10, width=1, Y_SIZE=21, then eCV=[0-20], having a size of 21
+    const int HALF_Y_SIZE = 10;
+    const int EXTENDED_X_SIZE = HALF_Y_SIZE;
+    const double EPSILON = 0.000001;   // for comparison of float numbers
+
+    class n_matrix {   // Stores the distribution matrix of n(x,y)
+
+    public:
+        n_matrix() {}
+        n_matrix(const std::vector<double> & lowerboundary,   // lowerboundary of x
+            const std::vector<double> & upperboundary,   // upperboundary of
+            const std::vector<double> & width,           // width of x
+            const int y_size) {          // size of y, for example, ysize=7, then when x=1, the distribution of y in [-2,4] is considered
+
+            int i;
+
+            this->lowerboundary = lowerboundary;
+            this->upperboundary = upperboundary;
+            this->width = width;
+            this->dimension = lowerboundary.size();
+            this->y_size = y_size;     // keep in mind the internal (spare) matrix is stored in diagonal form
+            this->y_total_size = int(pow(double(y_size), dimension) + EPSILON);
+
+            // the range of the matrix is [lowerboundary, upperboundary]
+            x_total_size = 1;
+            for (i = 0; i < dimension; i++) {
+                x_size.push_back(int((upperboundary[i] - lowerboundary[i]) / width[i] + EPSILON));
+                x_total_size *= x_size[i];
+            }
+
+            // initialize the internal matrix
+            matrix.reserve(x_total_size);
+            for (i = 0; i < x_total_size; i++) {
+                matrix.push_back(std::vector<int>(y_total_size, 0));
+            }
+
+            temp.resize(dimension);
+        }
+
+        int inline get_value(const std::vector<double> & x, const std::vector<double> & y) {
+            return matrix[convert_x(x)][convert_y(x, y)];
+        }
+
+        void inline set_value(const std::vector<double> & x, const std::vector<double> & y, const int value) {
+            matrix[convert_x(x)][convert_y(x,y)] = value;
+        }
+
+        void inline increase_value(const std::vector<double> & x, const std::vector<double> & y, const int value) {
+            matrix[convert_x(x)][convert_y(x,y)] += value;
+        }
+
+    private:
+        std::vector<double> lowerboundary;
+        std::vector<double> upperboundary;
+        std::vector<double> width;
+        int dimension;
+        std::vector<int> x_size;       // the size of x in each dimension
+        int x_total_size;              // the size of x of the internal matrix
+        int y_size;                    // the size of y in each dimension
+        int y_total_size;              // the size of y of the internal matrix
+
+        std::vector<std::vector<int> > matrix;  // the internal matrix
+
+        std::vector<int> temp;         // this vector is used in convert_x and convert_y to save computational resource
+
+        int i, j;
+
+        int convert_x(const std::vector<double> & x) {       // convert real x value to its interal index
+            for (i = 0; i < dimension; i++) {
+                temp[i] = int((x[i] - lowerboundary[i]) / width[i] + EPSILON);
+            }
+
+            int index = 0;
+            for (i = 0; i < dimension; i++) {
+                if (i + 1 < dimension) {
+                    int x_temp = 1;
+                    for (j = i + 1; j < dimension; j++)
+                        x_temp *= x_size[j];
+                    index += temp[i] * x_temp;
+                }
+                else
+                    index += temp[i];
+            }
+            return index;
+        }
+
+        int convert_y(const std::vector<double> & x, const std::vector<double> & y) {       // convert real y value to its interal index
+
+            int i;
+
+            for (i = 0; i < dimension; i++) {
+                temp[i] = round((round(y[i] / width[i] + EPSILON) - round(x[i] / width[i] + EPSILON)) + (y_size - 1) / 2 + EPSILON);
+            }
+
+            int index = 0;
+            for (i = 0; i < dimension; i++) {
+                if (i + 1 < dimension)
+                    index += temp[i] * int(pow(double(y_size), dimension - i - 1) + EPSILON);
+                else
+                    index += temp[i];
+            }
+            return index;
+        }
+
+        double round(double r) {
+            return (r > 0.0) ? floor(r + 0.5) : ceil(r - 0.5);
+        }
+    };
+
+    // vector, store the sum_x, sum_x_square, count_y
+    template <typename T>
+    class n_vector {
+
+    public:
+        n_vector() {}
+        n_vector(const std::vector<double> & lowerboundary,   // lowerboundary of x
+            const std::vector<double> & upperboundary,   // upperboundary of
+            const std::vector<double> & width,                // width of x
+            const int y_size,           // size of y, for example, ysize=7, then when x=1, the distribution of y in [-2,4] is considered
+            const T & default_value) {         //   the default value of T
+
+            this->width = width;
+            this->dimension = lowerboundary.size();
+
+            x_total_size = 1;
+            for (int i = 0; i < dimension; i++) {
+                this->lowerboundary.push_back(lowerboundary[i] - (y_size - 1) / 2 * width[i] - EPSILON);
+                this->upperboundary.push_back(upperboundary[i] + (y_size - 1) / 2 * width[i] + EPSILON);
+
+                x_size.push_back(int((this->upperboundary[i] - this->lowerboundary[i]) / this->width[i] + EPSILON));
+                x_total_size *= x_size[i];
+            }
+
+            // initialize the internal vector
+            vector.resize(x_total_size, default_value);
+
+            temp.resize(dimension);
+        }
+
+        const T inline get_value(const std::vector<double> & x) {
+            return vector[convert_x(x)];
+        }
+
+        void inline set_value(const std::vector<double> & x, const T value) {
+            vector[convert_x(x)] = value;
+        }
+
+        void inline increase_value(const std::vector<double> & x, const T value) {
+            vector[convert_x(x)] += value;
+        }
+    private:
+        std::vector<double> lowerboundary;
+        std::vector<double> upperboundary;
+        std::vector<double> width;
+        int dimension;
+        std::vector<int> x_size;       // the size of x in each dimension
+        int x_total_size;              // the size of x of the internal matrix
+
+        std::vector<T> vector;  // the internal vector
+
+        std::vector<int> temp;         // this vector is used in convert_x and convert_y to save computational resource
+
+        int convert_x(const std::vector<double> & x) {       // convert real x value to its interal index
+
+            int i, j;
+
+            for (i = 0; i < dimension; i++) {
+                temp[i] = int((x[i] - lowerboundary[i]) / width[i] + EPSILON);
+            }
+
+            int index = 0;
+            for (i = 0; i < dimension; i++) {
+                if (i + 1 < dimension) {
+                    int x_temp = 1;
+                    for (j = i + 1; j < dimension; j++)
+                        x_temp *= x_size[j];
+                    index += temp[i] * x_temp;
+                }
+                else
+                    index += temp[i];
+            }
+            return index;
+        }
+    };
+
+    class UIestimator {     // the implemension of UI estimator
+
+    public:
+        UIestimator() {}
+
+        //called when (re)start an eabf simulation
+        UIestimator(const std::vector<double> & lowerboundary,
+            const std::vector<double> & upperboundary,
+            const std::vector<double> & width,
+            const std::vector<double> & krestr,                // force constant in eABF
+            const std::string & output_filename,              // the prefix of output files
+            const int output_freq,
+            const bool restart,                              // whether restart from a .count and a .grad file
+            const std::vector<std::string> & input_filename,   // the prefixes of input files
+            const double temperature) {
+
+            // initialize variables
+            this->lowerboundary = lowerboundary;
+            this->upperboundary = upperboundary;
+            this->width = width;
+            this->krestr = krestr;
+            this->output_filename = output_filename;
+            this->output_freq = output_freq;
+            this->restart = restart;
+            this->input_filename = input_filename;
+            this->temperature = temperature;
+
+            int i, j;
+
+            dimension = lowerboundary.size();
+
+            for (i = 0; i < dimension; i++) {
+                sum_x.push_back(n_vector<double>(lowerboundary, upperboundary, width, Y_SIZE, 0.0));
+                sum_x_square.push_back(n_vector<double>(lowerboundary, upperboundary, width, Y_SIZE, 0.0));
+
+                x_av.push_back(n_vector<double>(lowerboundary, upperboundary, width, Y_SIZE, 0.0));
+                sigma_square.push_back(n_vector<double>(lowerboundary, upperboundary, width, Y_SIZE, 0.0));
+            }
+
+            count_y = n_vector<int>(lowerboundary, upperboundary, width, Y_SIZE, 0);
+            distribution_x_y = n_matrix(lowerboundary, upperboundary, width, Y_SIZE);
+
+            grad = n_vector<std::vector<double> >(lowerboundary, upperboundary, width, 1, std::vector<double>(dimension, 0.0));
+            count = n_vector<int>(lowerboundary, upperboundary, width, 1, 0);
+
+            written = false;
+            written_1D = false;
+
+            if (dimension == 1) {
+                std::vector<double> upperboundary_temp = upperboundary;
+                upperboundary_temp[0] = upperboundary[0] + width[0];
+                oneD_pmf = n_vector<double>(lowerboundary, upperboundary_temp, width, 1, 0.0);
+            }
+
+            if (restart == true) {
+                input_grad = n_vector<std::vector<double> >(lowerboundary, upperboundary, width, 1, std::vector<double>(dimension, 0.0));
+                input_count = n_vector<int>(lowerboundary, upperboundary, width, 1, 0);
+
+                // initialize input_Grad and input_count
+                // the loop_flag is a n-dimensional vector, increae from lowerboundary to upperboundary when looping
+                std::vector<double> loop_flag(dimension, 0);
+                for (i = 0; i < dimension; i++) {
+                    loop_flag[i] = lowerboundary[i];
+                }
+
+                i = 0;
+                while (i >= 0) {
+                    for (j = 0; j < dimension; j++) {
+                        input_grad.set_value(loop_flag, std::vector<double>(dimension,0));
+                    }
+                    input_count.set_value(loop_flag, 0);
+
+                    // iterate over any dimensions
+                    i = dimension - 1;
+                    while (i >= 0) {
+                        loop_flag[i] += width[i];
+                        if (loop_flag[i] > upperboundary[i] - width[i] + EPSILON) {
+                            loop_flag[i] = lowerboundary[i];
+                            i--;
+                        }
+                        else
+                            break;
+                    }
+                }
+                read_inputfiles(input_filename);
+            }
+        }
+
+        ~UIestimator() {}
+
+        // called from MD engine every step
+        bool update(const int step, std::vector<double> x, std::vector<double> y) {
+
+            int i;
+
+            if (step % output_freq == 0) {
+                calc_pmf();
+                write_files();
+                //write_interal_data();
+            }
+
+            for (i = 0; i < dimension; i++) {
+                // for dihedral RC, it is possible that x = 179 and y = -179, should correct it
+                // may have problem, need to fix
+                if (x[i] > 150 && y[i] < -150) {
+                    y[i] += 360;
+                }
+                if (x[i] < -150 && y[i] > 150) {
+                    y[i] -= 360;
+                }
+
+                if (x[i] < lowerboundary[i] - EXTENDED_X_SIZE * width[i] + EPSILON || x[i] > upperboundary[i] + EXTENDED_X_SIZE * width[i] - EPSILON \
+                    || y[i] - x[i] < -HALF_Y_SIZE * width[i] + EPSILON || y[i] - x[i] > HALF_Y_SIZE * width[i] - EPSILON \
+                    || y[i] - lowerboundary[i] < -HALF_Y_SIZE * width[i] + EPSILON || y[i] - upperboundary[i] > HALF_Y_SIZE * width[i] - EPSILON)
+                    return false;
+            }
+
+            for (i = 0; i < dimension; i++) {
+                sum_x[i].increase_value(y, x[i]);
+                sum_x_square[i].increase_value(y, x[i] * x[i]);
+            }
+            count_y.increase_value(y, 1);
+
+            for (i = 0; i < dimension; i++) {
+                // adapt colvars precision
+                if (x[i] < lowerboundary[i] + EPSILON || x[i] > upperboundary[i] - EPSILON)
+                    return false;
+            }
+            distribution_x_y.increase_value(x, y, 1);
+
+            return true;
+        }
+
+        // update the output_filename
+        void update_output_filename(const std::string& filename) {
+            output_filename = filename;
+        }
+
+    private:
+        std::vector<n_vector<double> > sum_x;                        // the sum of x in each y bin
+        std::vector<n_vector<double> > sum_x_square;                 // the sum of x in each y bin
+        n_vector<int> count_y;                              // the distribution of y
+        n_matrix distribution_x_y;   // the distribution of <x, y> pair
+
+        int dimension;
+
+        std::vector<double> lowerboundary;
+        std::vector<double> upperboundary;
+        std::vector<double> width;
+        std::vector<double> krestr;
+        std::string output_filename;
+        int output_freq;
+        bool restart;
+        std::vector<std::string> input_filename;
+        double temperature;
+
+        n_vector<std::vector<double> > grad;
+        n_vector<int> count;
+
+        n_vector<double> oneD_pmf;
+
+        n_vector<std::vector<double> > input_grad;
+        n_vector<int> input_count;
+
+        // used in double integration
+        std::vector<n_vector<double> > x_av;
+        std::vector<n_vector<double> > sigma_square;
+
+        bool written;
+        bool written_1D;
+
+        // calculate gradients from the internal variables
+        void calc_pmf() {
+            int norm;
+            int i, j, k;
+
+            std::vector<double> loop_flag(dimension, 0);
+            for (i = 0; i < dimension; i++) {
+                loop_flag[i] = lowerboundary[i] - HALF_Y_SIZE * width[i];
+            }
+
+            i = 0;
+            while (i >= 0) {
+                norm = count_y.get_value(loop_flag) > 0 ? count_y.get_value(loop_flag) : 1;
+                for (j = 0; j < dimension; j++) {
+                    x_av[j].set_value(loop_flag, sum_x[j].get_value(loop_flag) / norm);
+                    sigma_square[j].set_value(loop_flag, sum_x_square[j].get_value(loop_flag) / norm - x_av[j].get_value(loop_flag) * x_av[j].get_value(loop_flag));
+                }
+
+                // iterate over any dimensions
+                i = dimension - 1;
+                while (i >= 0) {
+                    loop_flag[i] += width[i];
+                    if (loop_flag[i] > upperboundary[i] + HALF_Y_SIZE * width[i] - width[i] + EPSILON) {
+                        loop_flag[i] = lowerboundary[i] - HALF_Y_SIZE * width[i];
+                        i--;
+                    }
+                    else
+                        break;
+                }
+            }
+
+            // double integration
+            std::vector<double> av(dimension, 0);
+            std::vector<double> diff_av(dimension, 0);
+
+            std::vector<double> loop_flag_x(dimension, 0);
+            std::vector<double> loop_flag_y(dimension, 0);
+            for (i = 0; i < dimension; i++) {
+                loop_flag_x[i] = lowerboundary[i];
+                loop_flag_y[i] = loop_flag_x[i] - HALF_Y_SIZE * width[i];
+            }
+
+            i = 0;
+            while (i >= 0) {
+                norm = 0;
+                for (k = 0; k < dimension; k++) {
+                    av[k] = 0;
+                    diff_av[k] = 0;
+                    loop_flag_y[k] = loop_flag_x[k] - HALF_Y_SIZE * width[k];
+                }
+
+                int j = 0;
+                while (j >= 0) {
+                    norm += distribution_x_y.get_value(loop_flag_x, loop_flag_y);
+                    for (k = 0; k < dimension; k++) {
+                        if (sigma_square[k].get_value(loop_flag_y) > EPSILON || sigma_square[k].get_value(loop_flag_y) < -EPSILON)
+                            av[k] += distribution_x_y.get_value(loop_flag_x, loop_flag_y) * ( (loop_flag_x[k] + 0.5 * width[k]) - x_av[k].get_value(loop_flag_y)) / sigma_square[k].get_value(loop_flag_y);
+
+                        diff_av[k] += distribution_x_y.get_value(loop_flag_x, loop_flag_y) * (loop_flag_x[k] - loop_flag_y[k]);
+                    }
+
+                    // iterate over any dimensions
+                    j = dimension - 1;
+                    while (j >= 0) {
+                        loop_flag_y[j] += width[j];
+                        if (loop_flag_y[j] > loop_flag_x[j] + HALF_Y_SIZE * width[j] - width[j] + EPSILON) {
+                            loop_flag_y[j] = loop_flag_x[j] - HALF_Y_SIZE * width[j];
+                            j--;
+                        }
+                        else
+                            break;
+                    }
+                }
+
+                std::vector<double> grad_temp(dimension, 0);
+                for (k = 0; k < dimension; k++) {
+                    diff_av[k] /= (norm > 0 ? norm : 1);
+                    av[k] = cvm::boltzmann() * temperature * av[k] / (norm > 0 ? norm : 1);
+                    grad_temp[k] = av[k] - krestr[k] * diff_av[k];
+                }
+                grad.set_value(loop_flag_x, grad_temp);
+                count.set_value(loop_flag_x, norm);
+
+                // iterate over any dimensions
+                i = dimension - 1;
+                while (i >= 0) {
+                    loop_flag_x[i] += width[i];
+                    if (loop_flag_x[i] > upperboundary[i] - width[i] + EPSILON) {
+                        loop_flag_x[i] = lowerboundary[i];
+                        i--;
+                    }
+                    else
+                        break;
+                }
+            }
+        }
+
+
+        // calculate 1D pmf
+        void calc_1D_pmf()
+        {
+            std::vector<double> last_position(1, 0);
+            std::vector<double> position(1, 0);
+
+            double min = 0;
+            double dG = 0;
+            double i;
+
+            oneD_pmf.set_value(lowerboundary, 0);
+            last_position = lowerboundary;
+            for (i = lowerboundary[0] + width[0]; i < upperboundary[0] + EPSILON; i += width[0]) {
+                position[0] = i + EPSILON;
+                if (restart == false || input_count.get_value(last_position) == 0) {
+                    dG = oneD_pmf.get_value(last_position) + grad.get_value(last_position)[0] * width[0];
+                }
+                else {
+                    dG = oneD_pmf.get_value(last_position) + ((grad.get_value(last_position)[0] * count.get_value(last_position) + input_grad.get_value(last_position)[0] * input_count.get_value(last_position)) / (count.get_value(last_position) + input_count.get_value(last_position))) * width[0];
+                }
+                if (dG < min)
+                    min = dG;
+                oneD_pmf.set_value(position, dG);
+                last_position[0] = i + EPSILON;
+            }
+
+            for (i = lowerboundary[0]; i < upperboundary[0] + EPSILON; i += width[0]) {
+                position[0] = i + EPSILON;
+                oneD_pmf.set_value(position, oneD_pmf.get_value(position) - min);
+            }
+        }
+
+        // write 1D pmf
+        void write_1D_pmf() {
+            std::string pmf_filename = output_filename + ".UI.pmf";
+
+            // only for colvars module!
+            if (written_1D) cvm::backup_file(pmf_filename.c_str());
+
+            std::ostream* ofile_pmf = cvm::proxy->output_stream(pmf_filename.c_str());
+
+            std::vector<double> position(1, 0);
+            for (double i = lowerboundary[0]; i < upperboundary[0] + EPSILON; i += width[0]) {
+                *ofile_pmf << i << " ";
+                position[0] = i + EPSILON;
+                *ofile_pmf << oneD_pmf.get_value(position) << std::endl;
+            }
+            cvm::proxy->close_output_stream(pmf_filename.c_str());
+
+            written_1D = true;
+        }
+
+        // write heads of the output files
+        void writehead(std::ostream& os) const {
+            os << "# " << dimension << std::endl;
+            for (int i = 0; i < dimension; i++) {
+                os << "# " << lowerboundary[i] << " " << width[i] << " " << int((upperboundary[i] - lowerboundary[i]) / width[i] + EPSILON) << " " << 0 << std::endl;
+            }
+            os << std::endl;
+        }
+
+        // write interal data, used for testing
+        void write_interal_data() {
+            std::string internal_filename = output_filename + ".UI.internal";
+
+            std::ostream* ofile_internal = cvm::proxy->output_stream(internal_filename.c_str());
+
+            std::vector<double> loop_flag(dimension, 0);
+            for (int i = 0; i < dimension; i++) {
+                loop_flag[i] = lowerboundary[i];
+            }
+
+            int n = 0;
+            while (n >= 0) {
+                for (int j = 0; j < dimension; j++) {
+                    *ofile_internal << loop_flag[j] + 0.5 * width[j] << " ";
+                }
+
+                for (int k = 0; k < dimension; k++) {
+                    *ofile_internal << grad.get_value(loop_flag)[k] << " ";
+                }
+
+                std::vector<double> ii(dimension,0);
+                for (double i = loop_flag[0] - 10; i < loop_flag[0] + 10 + EPSILON; i+= width[0]) {
+                    for (double j = loop_flag[1] - 10; j< loop_flag[1] + 10 + EPSILON; j+=width[1]) {
+                        ii[0] = i;
+                        ii[1] = j;
+                        *ofile_internal << i <<" "<<j<<" "<< distribution_x_y.get_value(loop_flag,ii)<< " ";
+                    }
+                }
+                *ofile_internal << std::endl;
+
+                // iterate over any dimensions
+                n = dimension - 1;
+                while (n >= 0) {
+                    loop_flag[n] += width[n];
+                    if (loop_flag[n] > upperboundary[n] - width[n] + EPSILON) {
+                        loop_flag[n] = lowerboundary[n];
+                        n--;
+                    }
+                    else
+                        break;
+                }
+            }
+            cvm::proxy->close_output_stream(internal_filename.c_str());
+        }
+
+        // write output files
+        void write_files() {
+            std::string grad_filename = output_filename + ".UI.grad";
+            std::string hist_filename = output_filename + ".UI.hist.grad";
+            std::string count_filename = output_filename + ".UI.count";
+
+            int i, j;
+//
+            // only for colvars module!
+            if (written) cvm::backup_file(grad_filename.c_str());
+            //if (written) cvm::backup_file(hist_filename.c_str());
+            if (written) cvm::backup_file(count_filename.c_str());
+
+            std::ostream* ofile = cvm::proxy->output_stream(grad_filename.c_str());
+            std::ostream* ofile_hist = cvm::proxy->output_stream(hist_filename.c_str(), std::ios::app);
+            std::ostream* ofile_count = cvm::proxy->output_stream(count_filename.c_str());
+
+            writehead(*ofile);
+            writehead(*ofile_hist);
+            writehead(*ofile_count);
+
+            if (dimension == 1) {
+                calc_1D_pmf();
+                write_1D_pmf();
+            }
+
+            std::vector<double> loop_flag(dimension, 0);
+            for (i = 0; i < dimension; i++) {
+                loop_flag[i] = lowerboundary[i];
+            }
+
+            i = 0;
+            while (i >= 0) {
+                for (j = 0; j < dimension; j++) {
+                    *ofile << loop_flag[j] + 0.5 * width[j] << " ";
+                    *ofile_hist << loop_flag[j] + 0.5 * width[j] << " ";
+                    *ofile_count << loop_flag[j] + 0.5 * width[j] << " ";
+                }
+
+                if (restart == false) {
+                    for (j = 0; j < dimension; j++) {
+                        *ofile << grad.get_value(loop_flag)[j] << " ";
+                        *ofile_hist << grad.get_value(loop_flag)[j] << " ";
+                    }
+                    *ofile << std::endl;
+                    *ofile_hist << std::endl;
+                    *ofile_count << count.get_value(loop_flag) << " " <<std::endl;
+                }
+                else {
+                    double final_grad = 0;
+                    for (j = 0; j < dimension; j++) {
+                        int total_count_temp = (count.get_value(loop_flag) + input_count.get_value(loop_flag));
+                        if (input_count.get_value(loop_flag) == 0)
+                            final_grad = grad.get_value(loop_flag)[j];
+                        else
+                            final_grad = ((grad.get_value(loop_flag)[j] * count.get_value(loop_flag) + input_grad.get_value(loop_flag)[j] * input_count.get_value(loop_flag)) / total_count_temp);
+                        *ofile << final_grad << " ";
+                        *ofile_hist << final_grad << " ";
+                    }
+                    *ofile << std::endl;
+                    *ofile_hist << std::endl;
+                    *ofile_count << (count.get_value(loop_flag) + input_count.get_value(loop_flag)) << " " <<std::endl;
+                }
+
+                // iterate over any dimensions
+                i = dimension - 1;
+                while (i >= 0) {
+                    loop_flag[i] += width[i];
+                    if (loop_flag[i] > upperboundary[i] - width[i] + EPSILON) {
+                        loop_flag[i] = lowerboundary[i];
+                        i--;
+                        *ofile << std::endl;
+                        *ofile_hist << std::endl;
+                        *ofile_count << std::endl;
+                    }
+                    else
+                        break;
+                }
+            }
+            cvm::proxy->close_output_stream(grad_filename.c_str());
+            cvm::proxy->close_output_stream(hist_filename.c_str());
+            cvm::proxy->close_output_stream(count_filename.c_str());
+
+            written = true;
+        }
+
+        // read input files
+        void read_inputfiles(const std::vector<std::string> input_filename)
+        {
+            char sharp;
+            double nothing;
+            int dimension_temp;
+            int i, j, k, l, m;
+
+            std::vector<double> loop_bin_size(dimension, 0);
+            std::vector<double> position_temp(dimension, 0);
+            std::vector<double> grad_temp(dimension, 0);
+            int count_temp = 0;
+            for (i = 0; i < int(input_filename.size()); i++) {
+                int size = 1 , size_temp = 0;
+
+                std::string count_filename = input_filename[i] + ".UI.count";
+                std::string grad_filename = input_filename[i] + ".UI.grad";
+
+                std::ifstream count_file(count_filename.c_str(), std::ios::in);
+                std::ifstream grad_file(grad_filename.c_str(), std::ios::in);
+
+                count_file >> sharp >> dimension_temp;
+                grad_file >> sharp >> dimension_temp;
+
+                for (j = 0; j < dimension; j++) {
+                    count_file >> sharp >> nothing >> nothing >> size_temp >> nothing;
+                    grad_file >> sharp >> nothing >> nothing >> nothing >> nothing;
+                    size *= size_temp;
+                }
+
+                for (j = 0; j < size; j++) {
+                    do {
+                        for (k = 0; k < dimension; k++) {
+                            count_file >> position_temp[k];
+                            grad_file >> nothing;
+                        }
+
+                        for (l = 0; l < dimension; l++) {
+                            grad_file >> grad_temp[l];
+                        }
+                        count_file >> count_temp;
+                    }
+                    while (position_temp[i] < lowerboundary[i] - EPSILON || position_temp[i] > upperboundary[i] + EPSILON);
+
+                    if (count_temp == 0) {
+                        continue;
+                    }
+
+                    for (m = 0; m < dimension; m++) {
+                        grad_temp[m] = (grad_temp[m] * count_temp + input_grad.get_value(position_temp)[m] * input_count.get_value(position_temp)) / (count_temp + input_count.get_value(position_temp));
+                    }
+                    input_grad.set_value(position_temp, grad_temp);
+                    input_count.increase_value(position_temp, count_temp);
+                }
+
+                count_file.close();
+                grad_file.close();
+            }
+        }
+    };
+};
+
+#endif
diff --git a/lib/colvars/colvaratoms.cpp b/lib/colvars/colvaratoms.cpp
index 9b4a922e3fbb2322455c17adca9194d2032ab6a1..d2a0f0a807d2727500d96e6a19a56eaa0af23f39 100644
--- a/lib/colvars/colvaratoms.cpp
+++ b/lib/colvars/colvaratoms.cpp
@@ -817,6 +817,18 @@ int cvm::atom_group::create_sorted_ids(void)
 }
 
 
+int cvm::atom_group::overlap(const atom_group &g1, const atom_group &g2){
+  for (cvm::atom_const_iter ai1 = g1.begin(); ai1 != g1.end(); ai1++) {
+    for (cvm::atom_const_iter ai2 = g2.begin(); ai2 != g2.end(); ai2++) {
+      if (ai1->id == ai2->id) {
+        return (ai1->id + 1); // 1-based index to allow boolean usage
+      }
+    }
+  }
+  return 0;
+}
+
+
 void cvm::atom_group::center_ref_pos()
 {
   ref_pos_cog = cvm::atom_pos(0.0, 0.0, 0.0);
diff --git a/lib/colvars/colvaratoms.h b/lib/colvars/colvaratoms.h
index 6113fb38a938b37a56d54825c51d43252d5b54db..71c587e23084e516c6b66358e97327c8d404822f 100644
--- a/lib/colvars/colvaratoms.h
+++ b/lib/colvars/colvaratoms.h
@@ -214,6 +214,12 @@ public:
   {
     return ag_features;
   }
+  static void delete_features() {
+    for (size_t i=0; i < ag_features.size(); i++) {
+      delete ag_features[i];
+    }
+    ag_features.clear();
+  }
 
 protected:
 
@@ -280,6 +286,10 @@ public:
   /// Allocates and populates the sorted list of atom ids
   int create_sorted_ids(void);
 
+  /// Detect whether two groups share atoms
+  /// If yes, returns 1-based number of a common atom; else, returns 0
+  static int overlap(const atom_group &g1, const atom_group &g2);
+
   /// \brief When updating atomic coordinates, translate them to align with the
   /// center of mass of the reference coordinates
   bool b_center;
diff --git a/lib/colvars/colvarbias.cpp b/lib/colvars/colvarbias.cpp
index 636727ca39b4f90b8b9686a64b51845857843c13..301e83e73015a91f14ff75a82c045954083d4074 100644
--- a/lib/colvars/colvarbias.cpp
+++ b/lib/colvars/colvarbias.cpp
@@ -10,6 +10,7 @@
 #include "colvarmodule.h"
 #include "colvarvalue.h"
 #include "colvarbias.h"
+#include "colvargrid.h"
 
 
 colvarbias::colvarbias(char const *key)
@@ -31,12 +32,14 @@ int colvarbias::init(std::string const &conf)
 {
   colvarparse::init(conf);
 
+  size_t i = 0;
+
   if (name.size() == 0) {
 
     // first initialization
 
     cvm::log("Initializing a new \""+bias_type+"\" instance.\n");
-    rank = cvm::num_biases_type(bias_type);
+    rank = cvm::main()->num_biases_type(bias_type);
     get_keyval(conf, "name", name, bias_type+cvm::to_str(rank));
 
     {
@@ -62,7 +65,7 @@ int colvarbias::init(std::string const &conf)
                      INPUT_ERROR);
           return INPUT_ERROR;
         }
-        for (size_t i = 0; i < colvar_names.size(); i++) {
+        for (i = 0; i < colvar_names.size(); i++) {
           add_colvar(colvar_names[i]);
         }
       }
@@ -148,6 +151,13 @@ int colvarbias::clear()
 }
 
 
+int colvarbias::clear_state_data()
+{
+  // no mutable content to delete for base class
+  return COLVARS_OK;
+}
+
+
 int colvarbias::add_colvar(std::string const &cv_name)
 {
   if (colvar *cv = cvm::colvar_by_name(cv_name)) {
@@ -164,6 +174,8 @@ int colvarbias::add_colvar(std::string const &cv_name)
     colvar_forces.back().is_derivative(); // colvar constraints are not applied to the force
     colvar_forces.back().reset();
 
+    previous_colvar_forces.push_back(colvar_forces.back());
+
     cv->biases.push_back(this); // add back-reference to this bias to colvar
 
     if (is_enabled(f_cvb_apply_force)) {
@@ -204,7 +216,8 @@ int colvarbias::update()
 
 void colvarbias::communicate_forces()
 {
-  for (size_t i = 0; i < num_variables(); i++) {
+  size_t i = 0;
+  for (i = 0; i < num_variables(); i++) {
     if (cvm::debug()) {
       cvm::log("Communicating a force to colvar \""+
                variables(i)->name+"\".\n");
@@ -216,6 +229,9 @@ void colvarbias::communicate_forces()
     // aware of this bias' time_step_factor
     variables(i)->add_bias_force(cvm::real(time_step_factor) * colvar_forces[i]);
   }
+  for (i = 0; i < num_variables(); i++) {
+    previous_colvar_forces[i] = colvar_forces[i];
+  }
 }
 
 
@@ -389,6 +405,259 @@ std::ostream & colvarbias::write_traj(std::ostream &os)
   return os;
 }
 
+
+
+colvarbias_ti::colvarbias_ti(char const *key)
+  : colvarbias(key)
+{
+  provide(f_cvb_calc_ti_samples);
+  ti_avg_forces = NULL;
+  ti_count = NULL;
+}
+
+
+colvarbias_ti::~colvarbias_ti()
+{
+  colvarbias_ti::clear_state_data();
+}
+
+
+int colvarbias_ti::clear_state_data()
+{
+  if (ti_avg_forces != NULL) {
+    delete ti_avg_forces;
+    ti_avg_forces = NULL;
+  }
+  if (ti_count != NULL) {
+    delete ti_count;
+    ti_count = NULL;
+  }
+  return COLVARS_OK;
+}
+
+
+int colvarbias_ti::init(std::string const &conf)
+{
+  int error_code = COLVARS_OK;
+
+  get_keyval_feature(this, conf, "writeTISamples",
+                     f_cvb_write_ti_samples,
+                     is_enabled(f_cvb_write_ti_samples));
+
+  get_keyval_feature(this, conf, "writeTIPMF",
+                     f_cvb_write_ti_pmf,
+                     is_enabled(f_cvb_write_ti_pmf));
+
+  if ((num_variables() > 1) && is_enabled(f_cvb_write_ti_pmf)) {
+    return cvm::error("Error: only 1-dimensional PMFs can be written "
+                      "on the fly.\n"
+                      "Consider using writeTISamples instead and "
+                      "post-processing the sampled free-energy gradients.\n",
+                      COLVARS_NOT_IMPLEMENTED);
+  } else {
+    error_code |= init_grids();
+  }
+
+  if (is_enabled(f_cvb_write_ti_pmf)) {
+    enable(f_cvb_write_ti_samples);
+  }
+
+  if (is_enabled(f_cvb_calc_ti_samples)) {
+    std::vector<std::string> const time_biases =
+      cvm::main()->time_dependent_biases();
+    if (time_biases.size() > 0) {
+      if ((time_biases.size() > 1) || (time_biases[0] != this->name)) {
+        for (size_t i = 0; i < num_variables(); i++) {
+          if (! variables(i)->is_enabled(f_cv_subtract_applied_force)) {
+            return cvm::error("Error: cannot collect TI samples while other "
+                              "time-dependent biases are active and not all "
+                              "variables have subtractAppliedForces on.\n",
+                              INPUT_ERROR);
+          }
+        }
+      }
+    }
+  }
+
+  return error_code;
+}
+
+
+int colvarbias_ti::init_grids()
+{
+  if (is_enabled(f_cvb_calc_ti_samples)) {
+    if (ti_avg_forces == NULL) {
+      ti_bin.resize(num_variables());
+      ti_system_forces.resize(num_variables());
+      for (size_t icv = 0; icv < num_variables(); icv++) {
+        ti_system_forces[icv].type(variables(icv)->value());
+        ti_system_forces[icv].is_derivative();
+        ti_system_forces[icv].reset();
+      }
+      ti_avg_forces = new colvar_grid_gradient(colvars);
+      ti_count = new colvar_grid_count(colvars);
+      ti_avg_forces->samples = ti_count;
+      ti_count->has_parent_data = true;
+    }
+  }
+
+  return COLVARS_OK;
+}
+
+
+int colvarbias_ti::update()
+{
+  return update_system_forces(NULL);
+}
+
+
+int colvarbias_ti::update_system_forces(std::vector<colvarvalue> const
+                                        *subtract_forces)
+{
+  if (! is_enabled(f_cvb_calc_ti_samples)) {
+    return COLVARS_OK;
+  }
+
+  has_data = true;
+
+  if (cvm::debug()) {
+    cvm::log("Updating system forces for bias "+this->name+"\n");
+  }
+
+  colvarproxy *proxy = cvm::main()->proxy;
+
+  size_t i;
+
+  if (proxy->total_forces_same_step()) {
+    for (i = 0; i < num_variables(); i++) {
+      ti_bin[i] = ti_avg_forces->current_bin_scalar(i);
+    }
+  }
+
+  // Collect total colvar forces
+  if ((cvm::step_relative() > 0) || proxy->total_forces_same_step()) {
+    if (ti_avg_forces->index_ok(ti_bin)) {
+      for (i = 0; i < num_variables(); i++) {
+        if (variables(i)->is_enabled(f_cv_subtract_applied_force)) {
+          // this colvar is already subtracting all applied forces
+          ti_system_forces[i] = variables(i)->total_force();
+        } else {
+          ti_system_forces[i] = variables(i)->total_force() -
+            ((subtract_forces != NULL) ?
+             (*subtract_forces)[i] : previous_colvar_forces[i]);
+        }
+      }
+      ti_avg_forces->acc_value(ti_bin, ti_system_forces);
+    }
+  }
+
+  if (!proxy->total_forces_same_step()) {
+    // Set the index for use in the next iteration, when total forces come in
+    for (i = 0; i < num_variables(); i++) {
+      ti_bin[i] = ti_avg_forces->current_bin_scalar(i);
+    }
+  }
+
+  return COLVARS_OK;
+}
+
+
+std::string const colvarbias_ti::get_state_params() const
+{
+  return std::string("");
+}
+
+
+int colvarbias_ti::set_state_params(std::string const &state_conf)
+{
+  return COLVARS_OK;
+}
+
+
+std::ostream & colvarbias_ti::write_state_data(std::ostream &os)
+{
+  if (! is_enabled(f_cvb_calc_ti_samples)) {
+    return os;
+  }
+  os << "\nhistogram\n";
+  ti_count->write_raw(os);
+  os << "\nsystem_forces\n";
+  ti_avg_forces->write_raw(os);
+  return os;
+}
+
+
+std::istream & colvarbias_ti::read_state_data(std::istream &is)
+{
+  if (! is_enabled(f_cvb_calc_ti_samples)) {
+    return is;
+  }
+  if (cvm::debug()) {
+    cvm::log("Reading state data for the TI estimator.\n");
+  }
+  if (! read_state_data_key(is, "histogram")) {
+    return is;
+  }
+  if (! ti_count->read_raw(is)) {
+    return is;
+  }
+  if (! read_state_data_key(is, "system_forces")) {
+    return is;
+  }
+  if (! ti_avg_forces->read_raw(is)) {
+    return is;
+  }
+  if (cvm::debug()) {
+    cvm::log("Done reading state data for the TI estimator.\n");
+  }
+  return is;
+}
+
+
+int colvarbias_ti::write_output_files()
+{
+  if (!has_data) {
+    // nothing to write
+    return COLVARS_OK;
+  }
+
+  std::string const ti_output_prefix = cvm::output_prefix()+"."+this->name;
+
+  std::ostream *os = NULL;
+
+  if (is_enabled(f_cvb_write_ti_samples)) {
+    std::string const ti_count_file_name(ti_output_prefix+".ti.count");
+    os = cvm::proxy->output_stream(ti_count_file_name);
+    if (os) {
+      ti_count->write_multicol(*os);
+      cvm::proxy->close_output_stream(ti_count_file_name);
+    }
+
+    std::string const ti_grad_file_name(ti_output_prefix+".ti.grad");
+    os = cvm::proxy->output_stream(ti_grad_file_name);
+    if (os) {
+      ti_avg_forces->write_multicol(*os);
+      cvm::proxy->close_output_stream(ti_grad_file_name);
+    }
+  }
+
+  if (is_enabled(f_cvb_write_ti_pmf)) {
+    std::string const pmf_file_name(ti_output_prefix+".ti.pmf");
+    cvm::log("Writing TI PMF to file \""+pmf_file_name+"\".\n");
+    os = cvm::proxy->output_stream(pmf_file_name);
+    if (os) {
+      // get the FE gradient
+      ti_avg_forces->multiply_constant(-1.0);
+      ti_avg_forces->write_1D_integral(*os);
+      ti_avg_forces->multiply_constant(-1.0);
+      cvm::proxy->close_output_stream(pmf_file_name);
+    }
+  }
+
+  return COLVARS_OK;
+}
+
+
 // Static members
 
 std::vector<colvardeps::feature *> colvarbias::cvb_features;
diff --git a/lib/colvars/colvarbias.h b/lib/colvars/colvarbias.h
index a147cd3210486cc74d80f5e0c0e5f859e0ea1b70..083b9d73036da056a2c76d3cfb8cef600049c1f3 100644
--- a/lib/colvars/colvarbias.h
+++ b/lib/colvars/colvarbias.h
@@ -109,6 +109,9 @@ public:
   /// \brief Delete everything
   virtual int clear();
 
+  /// \brief Delete only the allocatable data (save memory)
+  virtual int clear_state_data();
+
   /// Destructor
   virtual ~colvarbias();
 
@@ -183,6 +186,12 @@ public:
   {
     return cvb_features;
   }
+  static void delete_features() {
+    for (size_t i=0; i < cvb_features.size(); i++) {
+      delete cvb_features[i];
+    }
+    cvb_features.clear();
+  }
 
 protected:
 
@@ -194,6 +203,9 @@ protected:
   /// \brief Current forces from this bias to the variables
   std::vector<colvarvalue> colvar_forces;
 
+  /// \brief Forces last applied by this bias to the variables
+  std::vector<colvarvalue> previous_colvar_forces;
+
   /// \brief Current energy of this bias (colvar_forces should be obtained by deriving this)
   cvm::real                bias_energy;
 
@@ -209,4 +221,48 @@ protected:
 
 };
 
+
+class colvar_grid_gradient;
+class colvar_grid_count;
+
+/// \brief Base class for unconstrained thermodynamic-integration FE estimator
+class colvarbias_ti : public virtual colvarbias {
+public:
+
+  colvarbias_ti(char const *key);
+  virtual ~colvarbias_ti();
+
+  virtual int clear_state_data();
+
+  virtual int init(std::string const &conf);
+  virtual int init_grids();
+  virtual int update();
+
+  /// Subtract applied forces (either last forces or argument) from the total
+  /// forces
+  virtual int update_system_forces(std::vector<colvarvalue> const
+                                   *subtract_forces);
+
+  virtual std::string const get_state_params() const;
+  virtual int set_state_params(std::string const &state_conf);
+  virtual std::ostream & write_state_data(std::ostream &os);
+  virtual std::istream & read_state_data(std::istream &is);
+  virtual int write_output_files();
+
+protected:
+
+  /// \brief Forces exerted from the system to the associated variables
+  std::vector<colvarvalue> ti_system_forces;
+
+  /// Averaged system forces
+  colvar_grid_gradient *ti_avg_forces;
+
+  /// Histogram of sampled data
+  colvar_grid_count *ti_count;
+
+  /// Because total forces may be from the last simulation step,
+  /// store the index of the variables then
+  std::vector<int> ti_bin;
+};
+
 #endif
diff --git a/lib/colvars/colvarbias_abf.cpp b/lib/colvars/colvarbias_abf.cpp
index a96fc21d644e750d1a574596e4c3f0b99c87930f..e4aea8eb86e7dcb6712fc6dc2c771adeeb6909f6 100644
--- a/lib/colvars/colvarbias_abf.cpp
+++ b/lib/colvars/colvarbias_abf.cpp
@@ -14,6 +14,8 @@
 
 colvarbias_abf::colvarbias_abf(char const *key)
   : colvarbias(key),
+    b_UI_estimator(false),
+    b_CZAR_estimator(false),
     system_force(NULL),
     gradients(NULL),
     samples(NULL),
@@ -159,6 +161,7 @@ int colvarbias_abf::init(std::string const &conf)
 
   // Data for eABF z-based estimator
   if (b_extended) {
+    get_keyval(conf, "CZARestimator", b_CZAR_estimator, true);
     // CZAR output files for stratified eABF
     get_keyval(conf, "writeCZARwindowFile", b_czar_window_file, false,
                colvarparse::parse_silent);
@@ -187,8 +190,38 @@ int colvarbias_abf::init(std::string const &conf)
     read_gradients_samples();
   }
 
-  cvm::log("Finished ABF setup.\n");
+  // if extendedLangrangian is on, then call UI estimator
+  if (b_extended) {
+    get_keyval(conf, "UIestimator", b_UI_estimator, false);
+
+    if (b_UI_estimator) {
+    std::vector<double> UI_lowerboundary;
+    std::vector<double> UI_upperboundary;
+    std::vector<double> UI_width;
+    std::vector<double> UI_krestr;
+
+    bool UI_restart = (input_prefix.size() > 0);
+
+    for (size_t i = 0; i < colvars.size(); i++)
+    {
+      UI_lowerboundary.push_back(colvars[i]->lower_boundary);
+      UI_upperboundary.push_back(colvars[i]->upper_boundary);
+      UI_width.push_back(colvars[i]->width);
+      UI_krestr.push_back(colvars[i]->force_constant());
+    }
+      eabf_UI = UIestimator::UIestimator(UI_lowerboundary,
+                                         UI_upperboundary,
+                                         UI_width,
+                                         UI_krestr,                // force constant in eABF
+                                         output_prefix,              // the prefix of output files
+                                         cvm::restart_out_freq,
+                                         UI_restart,                    // whether restart from a .count and a .grad file
+                                         input_prefix,   // the prefixes of input files
+                                         cvm::temperature());
+    }
+  }
 
+  cvm::log("Finished ABF setup.\n");
   return COLVARS_OK;
 }
 
@@ -271,6 +304,10 @@ int colvarbias_abf::update()
         // and subtract previous ABF force if necessary
         update_system_force(i);
       }
+      if (cvm::proxy->total_forces_same_step()) {
+        // e.g. in LAMMPS, total forces are current
+        force_bin = bin;
+      }
       gradients->acc_force(force_bin, system_force);
     }
     if ( z_gradients && update_bias ) {
@@ -288,8 +325,11 @@ int colvarbias_abf::update()
     }
   }
 
-  // save bin for next timestep
-  force_bin = bin;
+  if (!cvm::proxy->total_forces_same_step()) {
+    // e.g. in NAMD, total forces will be available for next timestep
+    // hence we store the current colvar bin
+    force_bin = bin;
+  }
 
   // Reset biasing forces from previous timestep
   for (size_t i = 0; i < colvars.size(); i++) {
@@ -332,7 +372,7 @@ int colvarbias_abf::update()
   }
 
   // update the output prefix; TODO: move later to setup_output() function
-  if (cvm::num_biases_feature(colvardeps::f_cvb_calc_pmf) == 1) {
+  if (cvm::main()->num_biases_feature(colvardeps::f_cvb_calc_pmf) == 1) {
     // This is the only bias computing PMFs
     output_prefix = cvm::output_prefix();
   } else {
@@ -364,6 +404,20 @@ int colvarbias_abf::update()
     cvm::log("Prepared sample and gradient buffers at step "+cvm::to_str(cvm::step_absolute())+".");
   }
 
+  // update UI estimator every step
+  if (b_UI_estimator)
+  {
+    std::vector<double> x(colvars.size(),0);
+    std::vector<double> y(colvars.size(),0);
+    for (size_t i = 0; i < colvars.size(); i++)
+    {
+      x[i] = colvars[i]->actual_value();
+      y[i] = colvars[i]->value();
+    }
+    eabf_UI.update_output_filename(output_prefix);
+    eabf_UI.update(cvm::step_absolute(), x, y);
+  }
+
   return COLVARS_OK;
 }
 
@@ -479,8 +533,8 @@ void colvarbias_abf::write_gradients_samples(const std::string &prefix, bool app
     cvm::proxy->close_output_stream(pmf_out_name);
   }
 
-  if (z_gradients) {
-    // Write eABF-related quantities
+  if (b_CZAR_estimator) {
+    // Write eABF CZAR-related quantities
 
     std::string  z_samples_out_name = prefix + ".zcount";
 
@@ -588,7 +642,7 @@ void colvarbias_abf::read_gradients_samples()
       is.close();
     }
 
-    if (z_gradients) {
+    if (b_CZAR_estimator) {
       // Read eABF z-averaged data for CZAR
       cvm::log("Reading z-histogram from " + z_samples_in_name + " and z-gradient from " + z_gradients_in_name);
 
@@ -621,7 +675,7 @@ std::ostream & colvarbias_abf::write_state_data(std::ostream& os)
   os << "\ngradient\n";
   gradients->write_raw(os, 8);
 
-  if (z_gradients) {
+  if (b_CZAR_estimator) {
     os.setf(std::ios::fmtflags(0), std::ios::floatfield); // default floating-point format
     os << "\nz_samples\n";
     z_samples->write_raw(os, 8);
@@ -655,7 +709,7 @@ std::istream & colvarbias_abf::read_state_data(std::istream& is)
     return is;
   }
 
-  if (z_gradients) {
+  if (b_CZAR_estimator) {
 
     if (! read_state_data_key(is, "z_samples")) {
       return is;
diff --git a/lib/colvars/colvarbias_abf.h b/lib/colvars/colvarbias_abf.h
index 41a5475fa70087ca709e4ca2be9f0f118874a311..1defe72268c487e82a536d2f5e441e1067897272 100644
--- a/lib/colvars/colvarbias_abf.h
+++ b/lib/colvars/colvarbias_abf.h
@@ -17,6 +17,7 @@
 
 #include "colvarbias.h"
 #include "colvargrid.h"
+#include "colvar_UIestimator.h"
 
 typedef cvm::real* gradient_t;
 
@@ -50,6 +51,12 @@ private:
   /// Write CZAR output file for stratified eABF (.zgrad)
   bool      b_czar_window_file;
   size_t    history_freq;
+  /// Umbrella Integration estimator of free energy from eABF
+  UIestimator::UIestimator eabf_UI;
+  // Run UI estimator?
+  bool b_UI_estimator;
+  // Run CZAR estimator?
+  bool b_CZAR_estimator;
 
   /// Cap applied biasing force?
   bool                    cap_force;
diff --git a/lib/colvars/colvarbias_meta.cpp b/lib/colvars/colvarbias_meta.cpp
index 66806fc9fca01d57a596344ff2fd62a120272442..b0d154dfc9ebf591f49b15ba517f4bcf7ba10317 100644
--- a/lib/colvars/colvarbias_meta.cpp
+++ b/lib/colvars/colvarbias_meta.cpp
@@ -33,7 +33,7 @@
 
 
 colvarbias_meta::colvarbias_meta(char const *key)
-  : colvarbias(key)
+  : colvarbias(key), colvarbias_ti(key)
 {
   new_hills_begin = hills.end();
   hills_traj_os = NULL;
@@ -44,6 +44,7 @@ colvarbias_meta::colvarbias_meta(char const *key)
 int colvarbias_meta::init(std::string const &conf)
 {
   colvarbias::init(conf);
+  colvarbias_ti::init(conf);
 
   enable(f_cvb_calc_pmf);
 
@@ -104,7 +105,7 @@ int colvarbias_meta::init(std::string const &conf)
       get_keyval(conf, "dumpFreeEnergyFile", dump_fes, true, colvarparse::parse_silent);
     if (get_keyval(conf, "saveFreeEnergyFile", dump_fes_save, false, colvarparse::parse_silent)) {
       cvm::log("Option \"saveFreeEnergyFile\" is deprecated, "
-               "please use \"keepFreeEnergyFile\" instead.");
+               "please use \"keepFreeEnergyFiles\" instead.");
     }
     get_keyval(conf, "keepFreeEnergyFiles", dump_fes_save, dump_fes_save);
 
@@ -230,15 +231,7 @@ int colvarbias_meta::init_ebmeta_params(std::string const &conf)
 
 colvarbias_meta::~colvarbias_meta()
 {
-  if (hills_energy) {
-    delete hills_energy;
-    hills_energy = NULL;
-  }
-
-  if (hills_energy_gradients) {
-    delete hills_energy_gradients;
-    hills_energy_gradients = NULL;
-  }
+  colvarbias_meta::clear_state_data();
 
   if (replica_hills_os) {
     cvm::proxy->close_output_stream(replica_hills_file);
@@ -250,13 +243,31 @@ colvarbias_meta::~colvarbias_meta()
     hills_traj_os = NULL;
   }
 
-  if(target_dist) {
+  if (target_dist) {
     delete target_dist;
     target_dist = NULL;
   }
 }
 
 
+int colvarbias_meta::clear_state_data()
+{
+  if (hills_energy) {
+    delete hills_energy;
+    hills_energy = NULL;
+  }
+
+  if (hills_energy_gradients) {
+    delete hills_energy_gradients;
+    hills_energy_gradients = NULL;
+  }
+
+  hills.clear();
+  hills_off_grid.clear();
+
+  return COLVARS_OK;
+}
+
 
 // **********************************************************************
 // Hill management member functions
@@ -336,6 +347,9 @@ int colvarbias_meta::update()
   // update base class
   error_code |= colvarbias::update();
 
+  // update the TI estimator (if defined)
+  error_code |= colvarbias_ti::update();
+
   // update grid definition, if needed
   error_code |= update_grid_params();
   // add new biasing energy/forces
@@ -1000,6 +1014,10 @@ void colvarbias_meta::update_replicas_registry()
           (replicas.back())->hills_energy           = new colvar_grid_scalar(colvars);
           (replicas.back())->hills_energy_gradients = new colvar_grid_gradient(colvars);
         }
+        if (is_enabled(f_cvb_calc_ti_samples)) {
+          (replicas.back())->enable(f_cvb_calc_ti_samples);
+          (replicas.back())->colvarbias_ti::init_grids();
+        }
       }
     }
   } else {
@@ -1374,6 +1392,8 @@ std::istream & colvarbias_meta::read_state_data(std::istream& is)
     }
   }
 
+  colvarbias_ti::read_state_data(is);
+
   if (cvm::debug())
     cvm::log("colvarbias_meta::read_restart() done\n");
 
@@ -1474,7 +1494,7 @@ std::istream & colvarbias_meta::read_hill(std::istream &is)
 int colvarbias_meta::setup_output()
 {
   output_prefix = cvm::output_prefix();
-  if (cvm::num_biases_feature(colvardeps::f_cvb_calc_pmf) > 1) {
+  if (cvm::main()->num_biases_feature(colvardeps::f_cvb_calc_pmf) > 1) {
     // if this is not the only free energy integrator, append
     // this bias's name, to distinguish it from the output of the other
     // biases producing a .pmf file
@@ -1631,6 +1651,7 @@ std::ostream & colvarbias_meta::write_state_data(std::ostream& os)
     }
   }
 
+  colvarbias_ti::write_state_data(os);
   return os;
 }
 
@@ -1651,6 +1672,7 @@ int colvarbias_meta::write_state_to_replicas()
 
 int colvarbias_meta::write_output_files()
 {
+  colvarbias_ti::write_output_files();
   if (dump_fes) {
     write_pmf();
   }
diff --git a/lib/colvars/colvarbias_meta.h b/lib/colvars/colvarbias_meta.h
index 249f7342bc4b982b3747c56a11f9362b63914a41..78b2d35d41bb581d9b6da2297cf5886285415de5 100644
--- a/lib/colvars/colvarbias_meta.h
+++ b/lib/colvars/colvarbias_meta.h
@@ -19,7 +19,10 @@
 #include "colvargrid.h"
 
 /// Metadynamics bias (implementation of \link colvarbias \endlink)
-class colvarbias_meta : public colvarbias {
+class colvarbias_meta 
+  : public virtual colvarbias, 
+    public virtual colvarbias_ti
+{
 
 public:
 
@@ -35,10 +38,13 @@ public:
   Communication comm;
 
   colvarbias_meta(char const *key);
+  virtual ~colvarbias_meta();
+
   virtual int init(std::string const &conf);
   virtual int init_well_tempered_params(std::string const &conf);
   virtual int init_ebmeta_params(std::string const &conf);
-  virtual ~colvarbias_meta();
+
+  virtual int clear_state_data();
 
   virtual int update();
   virtual int update_grid_params();
diff --git a/lib/colvars/colvarbias_restraint.cpp b/lib/colvars/colvarbias_restraint.cpp
index 70beca29fa79eb25b328c0afa1d3c8cefdf8226a..23534f56eb43c1cd0a2e257ed8581449026d0c8f 100644
--- a/lib/colvars/colvarbias_restraint.cpp
+++ b/lib/colvars/colvarbias_restraint.cpp
@@ -14,7 +14,7 @@
 
 
 colvarbias_restraint::colvarbias_restraint(char const *key)
-  : colvarbias(key)
+  : colvarbias(key), colvarbias_ti(key)
 {
 }
 
@@ -24,6 +24,8 @@ int colvarbias_restraint::init(std::string const &conf)
   colvarbias::init(conf);
   enable(f_cvb_apply_force);
 
+  colvarbias_ti::init(conf);
+
   if (cvm::debug())
     cvm::log("Initializing a new restraint bias.\n");
 
@@ -86,7 +88,7 @@ std::ostream & colvarbias_restraint::write_traj(std::ostream &os)
 
 
 colvarbias_restraint_centers::colvarbias_restraint_centers(char const *key)
-  : colvarbias(key), colvarbias_restraint(key)
+  : colvarbias(key), colvarbias_ti(key), colvarbias_restraint(key)
 {
 }
 
@@ -145,7 +147,7 @@ int colvarbias_restraint_centers::change_configuration(std::string const &conf)
 
 
 colvarbias_restraint_k::colvarbias_restraint_k(char const *key)
-  : colvarbias(key), colvarbias_restraint(key)
+  : colvarbias(key), colvarbias_ti(key), colvarbias_restraint(key)
 {
   force_k = -1.0;
 }
@@ -237,6 +239,7 @@ int colvarbias_restraint_moving::set_state_params(std::string const &conf)
 
 colvarbias_restraint_centers_moving::colvarbias_restraint_centers_moving(char const *key)
   : colvarbias(key),
+    colvarbias_ti(key),
     colvarbias_restraint(key),
     colvarbias_restraint_centers(key),
     colvarbias_restraint_moving(key)
@@ -284,14 +287,17 @@ int colvarbias_restraint_centers_moving::init(std::string const &conf)
                                  target_centers[i],
                                  0.5);
     }
+
+    get_keyval(conf, "outputAccumulatedWork", b_output_acc_work,
+               b_output_acc_work); // TODO this conflicts with stages
+
   } else {
     target_centers.clear();
-    return COLVARS_OK;
   }
 
+  // Output restraint centers even when they do not change; some NAMD REUS
+  // scripts expect this behavior
   get_keyval(conf, "outputCenters", b_output_centers, b_output_centers);
-  get_keyval(conf, "outputAccumulatedWork", b_output_acc_work,
-             b_output_acc_work); // TODO this conflicts with stages
 
   return COLVARS_OK;
 }
@@ -475,6 +481,7 @@ std::ostream & colvarbias_restraint_centers_moving::write_traj(std::ostream &os)
 
 colvarbias_restraint_k_moving::colvarbias_restraint_k_moving(char const *key)
   : colvarbias(key),
+    colvarbias_ti(key),
     colvarbias_restraint(key),
     colvarbias_restraint_k(key),
     colvarbias_restraint_moving(key)
@@ -712,6 +719,7 @@ std::ostream & colvarbias_restraint::write_state(std::ostream &os)
 
 colvarbias_restraint_harmonic::colvarbias_restraint_harmonic(char const *key)
   : colvarbias(key),
+    colvarbias_ti(key),
     colvarbias_restraint(key),
     colvarbias_restraint_centers(key),
     colvarbias_restraint_moving(key),
@@ -743,17 +751,22 @@ int colvarbias_restraint_harmonic::init(std::string const &conf)
 
 int colvarbias_restraint_harmonic::update()
 {
+  int error_code = COLVARS_OK;
+
+  // update the TI estimator (if defined)
+  error_code |= colvarbias_ti::update();
+
   // update parameters (centers or force constant)
-  colvarbias_restraint_centers_moving::update();
-  colvarbias_restraint_k_moving::update();
+  error_code |= colvarbias_restraint_centers_moving::update();
+  error_code |= colvarbias_restraint_k_moving::update();
 
   // update restraint energy and forces
-  colvarbias_restraint::update();
+  error_code |= colvarbias_restraint::update();
 
   // update accumulated work using the current forces
-  colvarbias_restraint_centers_moving::update_acc_work();
+  error_code |= colvarbias_restraint_centers_moving::update_acc_work();
 
-  return COLVARS_OK;
+  return error_code;
 }
 
 
@@ -798,6 +811,18 @@ int colvarbias_restraint_harmonic::set_state_params(std::string const &conf)
 }
 
 
+std::ostream & colvarbias_restraint_harmonic::write_state_data(std::ostream &os)
+{
+  return colvarbias_ti::write_state_data(os);
+}
+
+
+std::istream & colvarbias_restraint_harmonic::read_state_data(std::istream &is)
+{
+  return colvarbias_ti::read_state_data(is);
+}
+
+
 std::ostream & colvarbias_restraint_harmonic::write_traj_label(std::ostream &os)
 {
   colvarbias_restraint::write_traj_label(os);
@@ -845,6 +870,7 @@ cvm::real colvarbias_restraint_harmonic::energy_difference(std::string const &co
 
 colvarbias_restraint_harmonic_walls::colvarbias_restraint_harmonic_walls(char const *key)
   : colvarbias(key),
+    colvarbias_ti(key),
     colvarbias_restraint(key),
     colvarbias_restraint_k(key),
     colvarbias_restraint_moving(key),
@@ -967,11 +993,15 @@ int colvarbias_restraint_harmonic_walls::init(std::string const &conf)
 
 int colvarbias_restraint_harmonic_walls::update()
 {
-  colvarbias_restraint_k_moving::update();
+  int error_code = COLVARS_OK;
 
-  colvarbias_restraint::update();
+  error_code |= colvarbias_ti::update();
 
-  return COLVARS_OK;
+  error_code |= colvarbias_restraint_k_moving::update();
+
+  error_code |= colvarbias_restraint::update();
+
+  return error_code;
 }
 
 
@@ -1065,6 +1095,18 @@ int colvarbias_restraint_harmonic_walls::set_state_params(std::string const &con
 }
 
 
+std::ostream & colvarbias_restraint_harmonic_walls::write_state_data(std::ostream &os)
+{
+  return colvarbias_ti::write_state_data(os);
+}
+
+
+std::istream & colvarbias_restraint_harmonic_walls::read_state_data(std::istream &is)
+{
+  return colvarbias_ti::read_state_data(is);
+}
+
+
 std::ostream & colvarbias_restraint_harmonic_walls::write_traj_label(std::ostream &os)
 {
   colvarbias_restraint::write_traj_label(os);
@@ -1084,6 +1126,7 @@ std::ostream & colvarbias_restraint_harmonic_walls::write_traj(std::ostream &os)
 
 colvarbias_restraint_linear::colvarbias_restraint_linear(char const *key)
   : colvarbias(key),
+    colvarbias_ti(key),
     colvarbias_restraint(key),
     colvarbias_restraint_centers(key),
     colvarbias_restraint_moving(key),
@@ -1120,17 +1163,22 @@ int colvarbias_restraint_linear::init(std::string const &conf)
 
 int colvarbias_restraint_linear::update()
 {
+  int error_code = COLVARS_OK;
+
+  // update the TI estimator (if defined)
+  error_code |= colvarbias_ti::update();
+
   // update parameters (centers or force constant)
-  colvarbias_restraint_centers_moving::update();
-  colvarbias_restraint_k_moving::update();
+  error_code |= colvarbias_restraint_centers_moving::update();
+  error_code |= colvarbias_restraint_k_moving::update();
 
   // update restraint energy and forces
-  colvarbias_restraint::update();
+  error_code |= colvarbias_restraint::update();
 
   // update accumulated work using the current forces
-  colvarbias_restraint_centers_moving::update_acc_work();
+  error_code |= colvarbias_restraint_centers_moving::update_acc_work();
 
-  return COLVARS_OK;
+  return error_code;
 }
 
 
@@ -1196,6 +1244,18 @@ int colvarbias_restraint_linear::set_state_params(std::string const &conf)
 }
 
 
+std::ostream & colvarbias_restraint_linear::write_state_data(std::ostream &os)
+{
+  return colvarbias_ti::write_state_data(os);
+}
+
+
+std::istream & colvarbias_restraint_linear::read_state_data(std::istream &is)
+{
+  return colvarbias_ti::read_state_data(is);
+}
+
+
 std::ostream & colvarbias_restraint_linear::write_traj_label(std::ostream &os)
 {
   colvarbias_restraint::write_traj_label(os);
diff --git a/lib/colvars/colvarbias_restraint.h b/lib/colvars/colvarbias_restraint.h
index 8c3a1537fc881cbca3dc8ef66fcf3caeb4d49c90..b10649cab112f8a10bfedaff0c10823cc7a43f04 100644
--- a/lib/colvars/colvarbias_restraint.h
+++ b/lib/colvars/colvarbias_restraint.h
@@ -16,7 +16,8 @@
 /// see derived classes for specific types
 /// (implementation of \link colvarbias \endlink)
 class colvarbias_restraint
-  : public virtual colvarbias
+  : public virtual colvarbias,
+    public virtual colvarbias_ti
 {
 
 public:
@@ -95,7 +96,7 @@ protected:
 
 /// Options to change the restraint configuration over time (shared between centers and k moving)
 class colvarbias_restraint_moving
-  : public virtual colvarparse {
+  : public virtual colvarparse, public virtual colvardeps {
 public:
 
   colvarbias_restraint_moving(char const *key);
@@ -226,6 +227,8 @@ public:
   virtual int update();
   virtual std::string const get_state_params() const;
   virtual int set_state_params(std::string const &conf);
+  virtual std::ostream & write_state_data(std::ostream &os);
+  virtual std::istream & read_state_data(std::istream &os);
   virtual std::ostream & write_traj_label(std::ostream &os);
   virtual std::ostream & write_traj(std::ostream &os);
   virtual int change_configuration(std::string const &conf);
@@ -252,6 +255,8 @@ public:
   virtual void communicate_forces();
   virtual std::string const get_state_params() const;
   virtual int set_state_params(std::string const &conf);
+  virtual std::ostream & write_state_data(std::ostream &os);
+  virtual std::istream & read_state_data(std::istream &os);
   virtual std::ostream & write_traj_label(std::ostream &os);
   virtual std::ostream & write_traj(std::ostream &os);
 
@@ -292,6 +297,8 @@ public:
 
   virtual std::string const get_state_params() const;
   virtual int set_state_params(std::string const &conf);
+  virtual std::ostream & write_state_data(std::ostream &os);
+  virtual std::istream & read_state_data(std::istream &os);
   virtual std::ostream & write_traj_label(std::ostream &os);
   virtual std::ostream & write_traj(std::ostream &os);
 
diff --git a/lib/colvars/colvarcomp.h b/lib/colvars/colvarcomp.h
index 3c1ec2495c9c5af29c5549b9c9ddaa355d2f1502..b94d798be9f0a3cd49abc27bb8cdaf41f88e8c87 100644
--- a/lib/colvars/colvarcomp.h
+++ b/lib/colvars/colvarcomp.h
@@ -140,7 +140,12 @@ public:
   {
     return cvc_features;
   }
-
+  static void delete_features() {
+    for (size_t i=0; i < cvc_features.size(); i++) {
+      delete cvc_features[i];
+    }
+    cvc_features.clear();
+  }
 
   /// \brief Obtain data needed for the calculation for the backend
   virtual void read_data();
diff --git a/lib/colvars/colvarcomp_coordnums.cpp b/lib/colvars/colvarcomp_coordnums.cpp
index 369d489e279c04d5051d8fef83ca0d16e5cf67ed..c34dc772157c64063627759370a8a936a2d46c13 100644
--- a/lib/colvars/colvarcomp_coordnums.cpp
+++ b/lib/colvars/colvarcomp_coordnums.cpp
@@ -18,6 +18,7 @@
 
 
 
+
 template<bool calculate_gradients>
 cvm::real colvar::coordnum::switching_function(cvm::real const &r0,
                                                int const &en,
@@ -32,8 +33,8 @@ cvm::real colvar::coordnum::switching_function(cvm::real const &r0,
   int const en2 = en/2;
   int const ed2 = ed/2;
 
-  cvm::real const xn = std::pow(l2, en2);
-  cvm::real const xd = std::pow(l2, ed2);
+  cvm::real const xn = cvm::integer_power(l2, en2);
+  cvm::real const xd = cvm::integer_power(l2, ed2);
   cvm::real const func = (1.0-xn)/(1.0-xd);
 
   if (calculate_gradients) {
@@ -62,8 +63,8 @@ cvm::real colvar::coordnum::switching_function(cvm::rvector const &r0_vec,
   int const en2 = en/2;
   int const ed2 = ed/2;
 
-  cvm::real const xn = std::pow(l2, en2);
-  cvm::real const xd = std::pow(l2, ed2);
+  cvm::real const xn = cvm::integer_power(l2, en2);
+  cvm::real const xd = cvm::integer_power(l2, ed2);
   cvm::real const func = (1.0-xn)/(1.0-xd);
 
   if (calculate_gradients) {
@@ -87,6 +88,12 @@ colvar::coordnum::coordnum(std::string const &conf)
   group1 = parse_group(conf, "group1");
   group2 = parse_group(conf, "group2");
 
+  if (int atom_number = cvm::atom_group::overlap(*group1, *group2)) {
+    cvm::error("Error: group1 and group2 share a common atom (number: " +
+      cvm::to_str(atom_number) + ")\n");
+    return;
+  }
+
   if (group1->b_dummy) {
     cvm::error("Error: only group2 is allowed to be a dummy atom\n");
     return;
@@ -111,11 +118,17 @@ colvar::coordnum::coordnum(std::string const &conf)
     if (r0_vec.z < 0.0) r0_vec.z *= -1.0;
   }
 
-  get_keyval(conf, "expNumer", en, int(6) );
-  get_keyval(conf, "expDenom", ed, int(12));
+  get_keyval(conf, "expNumer", en, 6);
+  get_keyval(conf, "expDenom", ed, 12);
 
   if ( (en%2) || (ed%2) ) {
-    cvm::error("Error: odd exponents provided, can only use even ones.\n", INPUT_ERROR);
+    cvm::error("Error: odd exponent(s) provided, can only use even ones.\n",
+               INPUT_ERROR);
+  }
+
+  if ( (en <= 0) || (ed <= 0) ) {
+    cvm::error("Error: negative exponent(s) provided.\n",
+               INPUT_ERROR);
   }
 
   if (!is_enabled(f_cvc_pbc_minimum_image)) {
@@ -250,8 +263,13 @@ colvar::h_bond::h_bond(std::string const &conf)
   get_keyval(conf, "expDenom", ed, 8);
 
   if ( (en%2) || (ed%2) ) {
-    cvm::error("Error: odd exponents provided, can only use even ones.\n");
-    return;
+    cvm::error("Error: odd exponent(s) provided, can only use even ones.\n",
+               INPUT_ERROR);
+  }
+
+  if ( (en <= 0) || (ed <= 0) ) {
+    cvm::error("Error: negative exponent(s) provided.\n",
+               INPUT_ERROR);
   }
 
   if (cvm::debug())
@@ -318,12 +336,18 @@ colvar::selfcoordnum::selfcoordnum(std::string const &conf)
   group1 = parse_group(conf, "group1");
 
   get_keyval(conf, "cutoff", r0, cvm::real(4.0 * cvm::unit_angstrom()));
-  get_keyval(conf, "expNumer", en, int(6) );
-  get_keyval(conf, "expDenom", ed, int(12));
+  get_keyval(conf, "expNumer", en, 6);
+  get_keyval(conf, "expDenom", ed, 12);
+
 
   if ( (en%2) || (ed%2) ) {
-    cvm::error("Error: odd exponents provided, can only use even ones.\n");
-    return;
+    cvm::error("Error: odd exponent(s) provided, can only use even ones.\n",
+               INPUT_ERROR);
+  }
+
+  if ( (en <= 0) || (ed <= 0) ) {
+    cvm::error("Error: negative exponent(s) provided.\n",
+               INPUT_ERROR);
   }
 
   if (!is_enabled(f_cvc_pbc_minimum_image)) {
@@ -401,12 +425,17 @@ colvar::groupcoordnum::groupcoordnum(std::string const &conf)
     if (r0_vec.z < 0.0) r0_vec.z *= -1.0;
   }
 
-  get_keyval(conf, "expNumer", en, int(6) );
-  get_keyval(conf, "expDenom", ed, int(12));
+  get_keyval(conf, "expNumer", en, 6);
+  get_keyval(conf, "expDenom", ed, 12);
 
   if ( (en%2) || (ed%2) ) {
-    cvm::error("Error: odd exponents provided, can only use even ones.\n");
-    return;
+    cvm::error("Error: odd exponent(s) provided, can only use even ones.\n",
+               INPUT_ERROR);
+  }
+
+  if ( (en <= 0) || (ed <= 0) ) {
+    cvm::error("Error: negative exponent(s) provided.\n",
+               INPUT_ERROR);
   }
 
   if (!is_enabled(f_cvc_pbc_minimum_image)) {
@@ -438,8 +467,8 @@ cvm::real colvar::groupcoordnum::switching_function(cvm::real const &r0,
   int const en2 = en/2;
   int const ed2 = ed/2;
 
-  cvm::real const xn = std::pow(l2, en2);
-  cvm::real const xd = std::pow(l2, ed2);
+  cvm::real const xn = cvm::integer_power(l2, en2);
+  cvm::real const xd = cvm::integer_power(l2, ed2);
   cvm::real const func = (1.0-xn)/(1.0-xd);
 
   if (calculate_gradients) {
@@ -471,8 +500,8 @@ cvm::real colvar::groupcoordnum::switching_function(cvm::rvector const &r0_vec,
   int const en2 = en/2;
   int const ed2 = ed/2;
 
-  cvm::real const xn = std::pow(l2, en2);
-  cvm::real const xd = std::pow(l2, ed2);
+  cvm::real const xn = cvm::integer_power(l2, en2);
+  cvm::real const xd = cvm::integer_power(l2, ed2);
   cvm::real const func = (1.0-xn)/(1.0-xd);
 
   if (calculate_gradients) {
diff --git a/lib/colvars/colvarcomp_distances.cpp b/lib/colvars/colvarcomp_distances.cpp
index 18d154515a3cfee7ade375083f70ab932da50067..ce8055843f93d389117f38926e81d97c66bc7954 100644
--- a/lib/colvars/colvarcomp_distances.cpp
+++ b/lib/colvars/colvarcomp_distances.cpp
@@ -1066,8 +1066,9 @@ void colvar::rmsd::calc_force_invgrads()
 void colvar::rmsd::calc_Jacobian_derivative()
 {
   // divergence of the rotated coordinates (including only derivatives of the rotation matrix)
-  cvm::real divergence = 0.0;
+  cvm::real rotation_term = 0.0;
 
+  // The rotation term only applies is coordinates are rotated
   if (atoms->b_rotate) {
 
     // gradient of the rotation matrix
@@ -1104,7 +1105,7 @@ void colvar::rmsd::calc_Jacobian_derivative()
 
       for (size_t alpha = 0; alpha < 3; alpha++) {
         for (size_t beta = 0; beta < 3; beta++) {
-          divergence += grad_rot_mat[beta][alpha][alpha] * y[beta];
+          rotation_term += grad_rot_mat[beta][alpha][alpha] * y[beta];
         // Note: equation was derived for inverse rotation (see colvars paper)
         // so here the matrix is transposed
         // (eq would give   divergence += grad_rot_mat[alpha][beta][alpha] * y[beta];)
@@ -1112,7 +1113,13 @@ void colvar::rmsd::calc_Jacobian_derivative()
       }
     }
   }
-  jd.real_value = x.real_value > 0.0 ? (3.0 * atoms->size() - 4.0 - divergence) / x.real_value : 0.0;
+
+  // The translation term only applies is coordinates are centered
+  cvm::real translation_term = atoms->b_center ? 3.0 : 0.0;
+
+  jd.real_value = x.real_value > 0.0 ?
+    (3.0 * atoms->size() - 1.0 - translation_term - rotation_term) / x.real_value :
+    0.0;
 }
 
 
diff --git a/lib/colvars/colvarcomp_protein.cpp b/lib/colvars/colvarcomp_protein.cpp
index b8fc96cfad140177e5adaa2ceccea1332b5702c7..91e47f13d90f22885d318eff8f9dc43a8b7fc21f 100644
--- a/lib/colvars/colvarcomp_protein.cpp
+++ b/lib/colvars/colvarcomp_protein.cpp
@@ -150,8 +150,8 @@ void colvar::alpha_angles::calc_value()
       (theta[i])->calc_value();
 
       cvm::real const t = ((theta[i])->value().real_value-theta_ref)/theta_tol;
-      cvm::real const f = ( (1.0 - std::pow(t, (int) 2)) /
-                            (1.0 - std::pow(t, (int) 4)) );
+      cvm::real const f = ( (1.0 - (t*t)) /
+                            (1.0 - (t*t*t*t)) );
 
       x.real_value += theta_norm * f;
 
@@ -202,12 +202,12 @@ void colvar::alpha_angles::apply_force(colvarvalue const &force)
     for (size_t i = 0; i < theta.size(); i++) {
 
       cvm::real const t = ((theta[i])->value().real_value-theta_ref)/theta_tol;
-      cvm::real const f = ( (1.0 - std::pow(t, (int) 2)) /
-                            (1.0 - std::pow(t, (int) 4)) );
+      cvm::real const f = ( (1.0 - (t*t)) /
+                            (1.0 - (t*t*t*t)) );
 
       cvm::real const dfdt =
-        1.0/(1.0 - std::pow(t, (int) 4)) *
-        ( (-2.0 * t) + (-1.0*f)*(-4.0 * std::pow(t, (int) 3)) );
+        1.0/(1.0 - (t*t*t*t)) *
+        ( (-2.0 * t) + (-1.0*f)*(-4.0 * (t*t*t)) );
 
       (theta[i])->apply_force(theta_norm *
                                dfdt * (1.0/theta_tol) *
diff --git a/lib/colvars/colvardeps.cpp b/lib/colvars/colvardeps.cpp
index 8f241a6255953f2ae0d4815a050daa12c077f866..ac906e7be755fc382d1cb383730522da38860092 100644
--- a/lib/colvars/colvardeps.cpp
+++ b/lib/colvars/colvardeps.cpp
@@ -413,15 +413,27 @@ void colvardeps::init_cvb_requires() {
     init_feature(f_cvb_apply_force, "apply force", f_type_user);
     f_req_children(f_cvb_apply_force, f_cv_gradient);
 
-    init_feature(f_cvb_get_total_force, "obtain total force");
+    init_feature(f_cvb_get_total_force, "obtain total force", f_type_dynamic);
     f_req_children(f_cvb_get_total_force, f_cv_total_force);
 
     init_feature(f_cvb_history_dependent, "history-dependent", f_type_static);
 
+    init_feature(f_cvb_time_dependent, "time-dependent", f_type_static);
+
     init_feature(f_cvb_scalar_variables, "require scalar variables", f_type_static);
     f_req_children(f_cvb_scalar_variables, f_cv_scalar);
 
     init_feature(f_cvb_calc_pmf, "calculate a PMF", f_type_static);
+
+    init_feature(f_cvb_calc_ti_samples, "calculate TI samples", f_type_dynamic);
+    f_req_self(f_cvb_calc_ti_samples, f_cvb_get_total_force);
+    f_req_children(f_cvb_calc_ti_samples, f_cv_grid);
+
+    init_feature(f_cvb_write_ti_samples, "write TI samples ", f_type_user);
+    f_req_self(f_cvb_write_ti_samples, f_cvb_calc_ti_samples);
+
+    init_feature(f_cvb_write_ti_pmf, "write TI PMF", f_type_user);
+    f_req_self(f_cvb_write_ti_pmf, f_cvb_calc_ti_samples);
   }
 
   // Initialize feature_states for each instance
@@ -431,6 +443,9 @@ void colvardeps::init_cvb_requires() {
     // Most features are available, so we set them so
     // and list exceptions below
   }
+
+  // only compute TI samples when deriving from colvarbias_ti
+  feature_states[f_cvb_calc_ti_samples].available = false;
 }
 
 
@@ -504,9 +519,6 @@ void colvardeps::init_cv_requires() {
 
     init_feature(f_cv_subtract_applied_force, "subtract applied force from total force", f_type_user);
     f_req_self(f_cv_subtract_applied_force, f_cv_total_force);
-    // There is no well-defined way to implement f_cv_subtract_applied_force
-    // in the case of extended-Lagrangian colvars
-    f_req_exclude(f_cv_subtract_applied_force, f_cv_extended_Lagrangian);
 
     init_feature(f_cv_lower_boundary, "lower boundary", f_type_user);
     f_req_self(f_cv_lower_boundary, f_cv_scalar);
@@ -514,7 +526,7 @@ void colvardeps::init_cv_requires() {
     init_feature(f_cv_upper_boundary, "upper boundary", f_type_user);
     f_req_self(f_cv_upper_boundary, f_cv_scalar);
 
-    init_feature(f_cv_grid, "grid", f_type_user);
+    init_feature(f_cv_grid, "grid", f_type_dynamic);
     f_req_self(f_cv_grid, f_cv_lower_boundary);
     f_req_self(f_cv_grid, f_cv_upper_boundary);
 
@@ -693,7 +705,6 @@ void colvardeps::print_state() {
 }
 
 
-
 void colvardeps::add_child(colvardeps *child) {
 
   children.push_back(child);
diff --git a/lib/colvars/colvardeps.h b/lib/colvars/colvardeps.h
index dfb10d00e421f7635fe4bce5ec88b5f45cfc39eb..bd892fbca8cef746bb4dfad818b1e4577507a735 100644
--- a/lib/colvars/colvardeps.h
+++ b/lib/colvars/colvardeps.h
@@ -180,8 +180,6 @@ public:
 
 protected:
 
-
-
   /// Parse a keyword and enable a feature accordingly
   bool get_keyval_feature(colvarparse *cvp,
                           std::string const &conf, char const *key,
@@ -229,10 +227,18 @@ public:
     f_cvb_get_total_force,
     /// \brief depends on simulation history
     f_cvb_history_dependent,
+    /// \brief depends on time
+    f_cvb_time_dependent,
     /// \brief requires scalar colvars
     f_cvb_scalar_variables,
     /// \brief whether this bias will compute a PMF
     f_cvb_calc_pmf,
+    /// \brief whether this bias will compute TI samples
+    f_cvb_calc_ti_samples,
+    /// \brief whether this bias will write TI samples
+    f_cvb_write_ti_samples,
+    /// \brief whether this bias should write the TI PMF
+    f_cvb_write_ti_pmf,
     f_cvb_ntot
   };
 
diff --git a/lib/colvars/colvargrid.h b/lib/colvars/colvargrid.h
index 6f06cb1066fffbc4ad8982a643264f496264657a..a01104dba819c2745a9d63d434b8e6832c9d85b7 100644
--- a/lib/colvars/colvargrid.h
+++ b/lib/colvars/colvargrid.h
@@ -1403,6 +1403,15 @@ public:
   /// Constructor from a vector of colvars
   colvar_grid_gradient(std::vector<colvar *>  &colvars);
 
+  /// \brief Accumulate the value
+  inline void acc_value(std::vector<int> const &ix, std::vector<colvarvalue> const &values) {
+    for (size_t imult = 0; imult < mult; imult++) {
+      data[address(ix) + imult] += values[imult].real_value;
+    }
+    if (samples)
+      samples->incr_count(ix);
+  }
+
   /// \brief Accumulate the gradient
   inline void acc_grad(std::vector<int> const &ix, cvm::real const *grads) {
     for (size_t imult = 0; imult < mult; imult++) {
diff --git a/lib/colvars/colvarmodule.cpp b/lib/colvars/colvarmodule.cpp
index 780dc28afaebd4c6db1788c0495e73f8b7904d5b..200c2d6848f10cf6a52ab70745c6049c9387dff7 100644
--- a/lib/colvars/colvarmodule.cpp
+++ b/lib/colvars/colvarmodule.cpp
@@ -22,7 +22,7 @@
 #include "colvarbias_restraint.h"
 #include "colvarscript.h"
 #include "colvaratoms.h"
-
+#include "colvarcomp.h"
 
 colvarmodule::colvarmodule(colvarproxy *proxy_in)
 {
@@ -274,9 +274,9 @@ int colvarmodule::parse_global_params(std::string const &conf)
   parse->get_keyval(conf, "colvarsRestartFrequency",
                     restart_out_freq, restart_out_freq);
 
-  // if this is true when initializing, it means
-  // we are continuing after a reset(): default to true
-  parse->get_keyval(conf, "colvarsTrajAppend", cv_traj_append, cv_traj_append);
+  // Deprecate append flag
+  parse->get_keyval(conf, "colvarsTrajAppend",
+                    cv_traj_append, cv_traj_append, colvarparse::parse_silent);
 
   parse->get_keyval(conf, "scriptedColvarForces", use_scripted_forces, false);
 
@@ -409,22 +409,12 @@ int colvarmodule::parse_biases(std::string const &conf)
     cvm::decrease_depth();
   }
 
-  size_t i;
-
-  size_t n_hist_dep_biases = 0;
-  std::vector<std::string> hist_dep_biases_names;
-  for (i = 0; i < biases.size(); i++) {
-    if (biases[i]->is_enabled(colvardeps::f_cvb_apply_force) &&
-        biases[i]->is_enabled(colvardeps::f_cvb_history_dependent)) {
-      n_hist_dep_biases++;
-      hist_dep_biases_names.push_back(biases[i]->name);
-    }
-  }
-  if (n_hist_dep_biases > 1) {
-    cvm::log("WARNING: there are "+cvm::to_str(n_hist_dep_biases)+
-             " history-dependent biases with non-zero force parameters:\n"+
-             cvm::to_str(hist_dep_biases_names)+"\n"+
-             "Please make sure that their forces do not counteract each other.\n");
+  std::vector<std::string> const time_biases = time_dependent_biases();
+  if (time_biases.size() > 1) {
+    cvm::log("WARNING: there are "+cvm::to_str(time_biases.size())+
+             " time-dependent biases with non-zero force parameters:\n"+
+             cvm::to_str(time_biases)+"\n"+
+             "Please ensure that their forces do not counteract each other.\n");
   }
 
   if (biases.size() || use_scripted_forces) {
@@ -441,7 +431,7 @@ int colvarmodule::parse_biases(std::string const &conf)
 }
 
 
-int colvarmodule::num_biases_feature(int feature_id)
+int colvarmodule::num_biases_feature(int feature_id) const
 {
   colvarmodule *cv = cvm::main();
   size_t n = 0;
@@ -456,7 +446,7 @@ int colvarmodule::num_biases_feature(int feature_id)
 }
 
 
-int colvarmodule::num_biases_type(std::string const &type)
+int colvarmodule::num_biases_type(std::string const &type) const
 {
   colvarmodule *cv = cvm::main();
   size_t n = 0;
@@ -471,6 +461,22 @@ int colvarmodule::num_biases_type(std::string const &type)
 }
 
 
+std::vector<std::string> const colvarmodule::time_dependent_biases() const
+{
+  size_t i;
+  std::vector<std::string> biases_names;
+  for (i = 0; i < biases.size(); i++) {
+    if (biases[i]->is_enabled(colvardeps::f_cvb_apply_force) &&
+        biases[i]->is_enabled(colvardeps::f_cvb_active) &&
+        (biases[i]->is_enabled(colvardeps::f_cvb_history_dependent) ||
+         biases[i]->is_enabled(colvardeps::f_cvb_time_dependent))) {
+      biases_names.push_back(biases[i]->name);
+    }
+  }
+  return biases_names;
+}
+
+
 int colvarmodule::catch_input_errors(int result)
 {
   if (result != COLVARS_OK || get_error()) {
@@ -673,8 +679,15 @@ int colvarmodule::calc()
   }
 
   // write restart files, if needed
-  if (restart_out_freq && restart_out_name.size()) {
-    error_code |= write_restart_files();
+  if (restart_out_freq && (cvm::step_relative() > 0) &&
+      ((cvm::step_absolute() % restart_out_freq) == 0) ) {
+    if (restart_out_name.size()) {
+      // Write restart file, if different from main output
+      error_code |= write_restart_file(restart_out_name);
+    } else {
+      error_code |= write_restart_file(output_prefix()+".colvars.state");
+    }
+    write_output_files();
   }
 
   return error_code;
@@ -916,21 +929,16 @@ int colvarmodule::calc_scripted_forces()
 }
 
 
-int colvarmodule::write_restart_files()
+int colvarmodule::write_restart_file(std::string const &out_name)
 {
-  if ( (cvm::step_relative() > 0) &&
-       ((cvm::step_absolute() % restart_out_freq) == 0) ) {
-    cvm::log("Writing the state file \""+
-             restart_out_name+"\".\n");
-    proxy->backup_file(restart_out_name);
-    std::ostream *restart_out_os = proxy->output_stream(restart_out_name);
-    if (!restart_out_os) return cvm::get_error();
-    if (!write_restart(*restart_out_os)) {
-      return cvm::error("Error: in writing restart file.\n", FILE_ERROR);
-    }
-    proxy->close_output_stream(restart_out_name);
+  cvm::log("Saving collective variables state to \""+out_name+"\".\n");
+  proxy->backup_file(out_name);
+  std::ostream *restart_out_os = proxy->output_stream(out_name);
+  if (!restart_out_os) return cvm::get_error();
+  if (!write_restart(*restart_out_os)) {
+    return cvm::error("Error: in writing restart file.\n", FILE_ERROR);
   }
-
+  proxy->close_output_stream(out_name);
   return (cvm::get_error() ? COLVARS_ERROR : COLVARS_OK);
 }
 
@@ -1011,7 +1019,15 @@ colvarmodule::~colvarmodule()
 {
   if ((proxy->smp_thread_id() == COLVARS_NOT_IMPLEMENTED) ||
       (proxy->smp_thread_id() == 0)) {
+
     reset();
+
+    // Delete contents of static arrays
+    colvarbias::delete_features();
+    colvar::delete_features();
+    colvar::cvc::delete_features();
+    atom_group::delete_features();
+
     delete parse;
     parse = NULL;
     proxy = NULL;
@@ -1261,7 +1277,7 @@ continue the previous simulation.\n\n");
 to:\n\
 \""+ proxy->input_prefix()+".colvars.state\"\n");
     output_prefix() = output_prefix()+".tmp";
-    write_output_files();
+    write_restart_file(output_prefix()+".colvars.state");
     cvm::error("Exiting with error until issue is addressed.\n", FATAL_ERROR);
   }
 
@@ -1277,24 +1293,13 @@ int colvarmodule::backup_file(char const *filename)
 
 int colvarmodule::write_output_files()
 {
-  // if this is a simulation run (i.e. not a postprocessing), output data
-  // must be written to be able to restart the simulation
-  std::string const out_name =
-    (output_prefix().size() ?
-     std::string(output_prefix()+".colvars.state") :
-     std::string("colvars.state"));
-  cvm::log("Saving collective variables state to \""+out_name+"\".\n");
-
-  std::ostream * os = proxy->output_stream(out_name);
-  os->setf(std::ios::scientific, std::ios::floatfield);
-  this->write_restart(*os);
-  proxy->close_output_stream(out_name);
+  int error_code = COLVARS_OK;
 
   cvm::increase_depth();
   for (std::vector<colvar *>::iterator cvi = colvars.begin();
        cvi != colvars.end();
        cvi++) {
-    (*cvi)->write_output_files();
+    error_code |= (*cvi)->write_output_files();
   }
   cvm::decrease_depth();
 
@@ -1302,8 +1307,8 @@ int colvarmodule::write_output_files()
   for (std::vector<colvarbias *>::iterator bi = biases.begin();
        bi != biases.end();
        bi++) {
-    (*bi)->write_output_files();
-    (*bi)->write_state_to_replicas();
+    error_code |= (*bi)->write_output_files();
+    error_code |= (*bi)->write_state_to_replicas();
   }
   cvm::decrease_depth();
 
@@ -1403,15 +1408,12 @@ std::ostream & colvarmodule::write_restart(std::ostream &os)
        cvi != colvars.end();
        cvi++) {
     (*cvi)->write_restart(os);
-    error_code |= (*cvi)->write_output_files();
   }
 
   for (std::vector<colvarbias *>::iterator bi = biases.begin();
        bi != biases.end();
        bi++) {
     (*bi)->write_state(os);
-    error_code |= (*bi)->write_state_to_replicas();
-    error_code |= (*bi)->write_output_files();
   }
   cvm::decrease_depth();
 
diff --git a/lib/colvars/colvarmodule.h b/lib/colvars/colvarmodule.h
index 0f6efd14c4ea1b7574304c7bc16f067b735b5076..14e5d56701f75730c7bb1ce07764c89edd0b665b 100644
--- a/lib/colvars/colvarmodule.h
+++ b/lib/colvars/colvarmodule.h
@@ -83,6 +83,15 @@ public:
 
   /// Defining an abstract real number allows to switch precision
   typedef  double    real;
+
+  /// Override std::pow with a product for n positive integer
+  static inline real integer_power(real x, int n)
+  {
+    real result = 1.0;
+    for (int i = 0; i < n; i++) result *= x;
+    return result;
+  }
+
   /// Residue identifier
   typedef  int       residue_id;
 
@@ -293,10 +302,13 @@ private:
 public:
 
   /// Return how many biases have this feature enabled
-  static int num_biases_feature(int feature_id);
+  int num_biases_feature(int feature_id) const;
 
   /// Return how many biases are defined with this type
-  static int num_biases_type(std::string const &type);
+  int num_biases_type(std::string const &type) const;
+
+  /// Return the names of time-dependent biases with forces enabled
+  std::vector<std::string> const time_dependent_biases() const;
 
 private:
   /// Useful wrapper to interrupt parsing if any error occurs
@@ -334,9 +346,9 @@ public:
 
   /// Write all trajectory files
   int write_traj_files();
-  /// Write all restart files
-  int write_restart_files();
-  /// Write all FINAL output files
+  /// Write a state file useful to resume the simulation
+  int write_restart_file(std::string const &out_name);
+  /// Write all other output files
   int write_output_files();
   /// Backup a file before writing it
   static int backup_file(char const *filename);
@@ -580,7 +592,7 @@ public:
   /// from static functions in the colvarmodule class
   static colvarproxy *proxy;
 
-  /// \brief Accessor for the above
+  /// \brief Access the one instance of the Colvars module
   static colvarmodule *main();
 
 };
diff --git a/lib/colvars/colvarproxy.cpp b/lib/colvars/colvarproxy.cpp
index fa24091d5233da18a1af36f0dffceca3f89a26fb..8160144c6bab1e11d36f4d1b7712e75bce90e6c8 100644
--- a/lib/colvars/colvarproxy.cpp
+++ b/lib/colvars/colvarproxy.cpp
@@ -10,6 +10,10 @@
 #include <sstream>
 #include <string.h>
 
+#if defined(_OPENMP)
+#include <omp.h>
+#endif
+
 #include "colvarmodule.h"
 #include "colvarproxy.h"
 #include "colvarscript.h"
@@ -40,6 +44,12 @@ bool colvarproxy_system::total_forces_enabled() const
 }
 
 
+bool colvarproxy_system::total_forces_same_step() const
+{
+  return false;
+}
+
+
 cvm::real colvarproxy_system::position_dist2(cvm::atom_pos const &pos1,
                                              cvm::atom_pos const &pos2)
 {
@@ -204,7 +214,13 @@ void colvarproxy_atom_groups::clear_atom_group(int index)
 
 colvarproxy_smp::colvarproxy_smp()
 {
-  b_smp_active = true;
+  b_smp_active = true; // May be disabled by user option
+  omp_lock_state = NULL;
+#if defined(_OPENMP)
+  if (smp_thread_id() == 0) {
+    omp_init_lock(reinterpret_cast<omp_lock_t *>(omp_lock_state));
+  }
+#endif
 }
 
 
@@ -213,60 +229,143 @@ colvarproxy_smp::~colvarproxy_smp() {}
 
 int colvarproxy_smp::smp_enabled()
 {
+#if defined(_OPENMP)
+  if (b_smp_active) {
+    return COLVARS_OK;
+  }
+  return COLVARS_ERROR;
+#else
   return COLVARS_NOT_IMPLEMENTED;
+#endif
 }
 
 
 int colvarproxy_smp::smp_colvars_loop()
 {
+#if defined(_OPENMP)
+  colvarmodule *cv = cvm::main();
+  colvarproxy *proxy = cv->proxy;
+#pragma omp parallel for
+  for (size_t i = 0; i < cv->variables_active_smp()->size(); i++) {
+    colvar *x = (*(cv->variables_active_smp()))[i];
+    int x_item = (*(cv->variables_active_smp_items()))[i];
+    if (cvm::debug()) {
+      cvm::log("["+cvm::to_str(proxy->smp_thread_id())+"/"+
+               cvm::to_str(proxy->smp_num_threads())+
+               "]: calc_colvars_items_smp(), i = "+cvm::to_str(i)+", cv = "+
+               x->name+", cvc = "+cvm::to_str(x_item)+"\n");
+    }
+    x->calc_cvcs(x_item, 1);
+  }
+  return cvm::get_error();
+#else
   return COLVARS_NOT_IMPLEMENTED;
+#endif
 }
 
 
 int colvarproxy_smp::smp_biases_loop()
 {
+#if defined(_OPENMP)
+  colvarmodule *cv = cvm::main();
+#pragma omp parallel
+  {
+#pragma omp for
+    for (size_t i = 0; i < cv->biases_active()->size(); i++) {
+      colvarbias *b = (*(cv->biases_active()))[i];
+      if (cvm::debug()) {
+        cvm::log("Calculating bias \""+b->name+"\" on thread "+
+                 cvm::to_str(smp_thread_id())+"\n");
+      }
+      b->update();
+    }
+  }
+  return cvm::get_error();
+#else
   return COLVARS_NOT_IMPLEMENTED;
+#endif
 }
 
 
 int colvarproxy_smp::smp_biases_script_loop()
 {
+#if defined(_OPENMP)
+  colvarmodule *cv = cvm::main();
+#pragma omp parallel
+  {
+#pragma omp single nowait
+    {
+      cv->calc_scripted_forces();
+    }
+#pragma omp for
+    for (size_t i = 0; i < cv->biases_active()->size(); i++) {
+      colvarbias *b = (*(cv->biases_active()))[i];
+      if (cvm::debug()) {
+        cvm::log("Calculating bias \""+b->name+"\" on thread "+
+                 cvm::to_str(smp_thread_id())+"\n");
+      }
+      b->update();
+    }
+  }
+  return cvm::get_error();
+#else
   return COLVARS_NOT_IMPLEMENTED;
+#endif
 }
 
 
+
+
 int colvarproxy_smp::smp_thread_id()
 {
+#if defined(_OPENMP)
+  return omp_get_thread_num();
+#else
   return COLVARS_NOT_IMPLEMENTED;
+#endif
 }
 
 
 int colvarproxy_smp::smp_num_threads()
 {
+#if defined(_OPENMP)
+  return omp_get_max_threads();
+#else
   return COLVARS_NOT_IMPLEMENTED;
+#endif
 }
 
 
 int colvarproxy_smp::smp_lock()
 {
+#if defined(_OPENMP)
+  omp_set_lock(reinterpret_cast<omp_lock_t *>(omp_lock_state));
+#endif
   return COLVARS_OK;
 }
 
 
 int colvarproxy_smp::smp_trylock()
 {
+#if defined(_OPENMP)
+  return omp_test_lock(reinterpret_cast<omp_lock_t *>(omp_lock_state)) ?
+    COLVARS_OK : COLVARS_ERROR;
+#else
   return COLVARS_OK;
+#endif
 }
 
 
 int colvarproxy_smp::smp_unlock()
 {
+#if defined(_OPENMP)
+  omp_unset_lock(reinterpret_cast<omp_lock_t *>(omp_lock_state));
+#endif
   return COLVARS_OK;
 }
 
 
 
-
 colvarproxy_replicas::colvarproxy_replicas() {}
 
 
diff --git a/lib/colvars/colvarproxy.h b/lib/colvars/colvarproxy.h
index 95d13cd7e05c15ba209333f4a6550e11d29b5977..e51ddfbe3bfc16a9d9a77264b86912a2cd8aa34f 100644
--- a/lib/colvars/colvarproxy.h
+++ b/lib/colvars/colvarproxy.h
@@ -80,6 +80,9 @@ public:
 
   /// Are total forces being used?
   virtual bool total_forces_enabled() const;
+
+  /// Are total forces from the current step available?
+  virtual bool total_forces_same_step() const;
 };
 
 
@@ -372,6 +375,11 @@ public:
 
   /// Release the lock
   virtual int smp_unlock();
+
+protected:
+
+  /// Lock state for OpenMP
+  void *omp_lock_state;
 };
 
 
diff --git a/lib/colvars/colvars_version.h b/lib/colvars/colvars_version.h
index 312c0fd1a0c86e832ccbac15605867ddd101c824..a92a776f8a3abc88700794228bcf4a1e0ad8da51 100644
--- a/lib/colvars/colvars_version.h
+++ b/lib/colvars/colvars_version.h
@@ -1,5 +1,5 @@
 #ifndef COLVARS_VERSION
-#define COLVARS_VERSION "2017-08-06"
+#define COLVARS_VERSION "2017-10-20"
 // This file is part of the Collective Variables module (Colvars).
 // The original version of Colvars and its updates are located at:
 // https://github.com/colvars/colvars
diff --git a/lib/colvars/colvarscript.cpp b/lib/colvars/colvarscript.cpp
index 89302a16a2ead1c29bbc501ec6d8e2aa9d043cb8..9570acd8327252a99e6394f9bd29d6f6a0bd2f4e 100644
--- a/lib/colvars/colvarscript.cpp
+++ b/lib/colvars/colvarscript.cpp
@@ -11,7 +11,10 @@
 #include <stdlib.h>
 #include <string.h>
 
+#define COLVARSCRIPT_CPP
 #include "colvarscript.h"
+#undef COLVARSCRIPT_CPP
+
 #include "colvarproxy.h"
 #include "colvardeps.h"
 
@@ -21,6 +24,11 @@ colvarscript::colvarscript(colvarproxy *p)
    colvars(p->colvars),
    proxy_error(0)
 {
+  comm_help.resize(colvarscript::cv_n_commands);
+  comm_fns.resize(colvarscript::cv_n_commands);
+#define COLVARSCRIPT_INIT_FN
+#include "colvarscript.h"
+#undef COLVARSCRIPT_INIT_FN
 }
 
 
@@ -66,8 +74,7 @@ int colvarscript::run(int objc, unsigned char *const objv[])
   }
 
   if (objc < 2) {
-    result = help_string();
-    return COLVARS_OK;
+    return exec_command(cv_help, NULL, objc, objv);
   }
 
   std::string const cmd(obj_to_str(objv[1]));
@@ -167,17 +174,7 @@ int colvarscript::run(int objc, unsigned char *const objv[])
 
   /// Parse config from string
   if (cmd == "config") {
-    if (objc < 3) {
-      result = "Missing arguments\n" + help_string();
-      return COLVARSCRIPT_ERROR;
-    }
-    std::string const conf(obj_to_str(objv[2]));
-    if (colvars->read_config_string(conf) == COLVARS_OK) {
-      return COLVARS_OK;
-    } else {
-      result = "Error parsing configuration string";
-      return COLVARSCRIPT_ERROR;
-    }
+    return exec_command(cv_config, NULL, objc, objv);
   }
 
   /// Load an input state file
@@ -204,6 +201,8 @@ int colvarscript::run(int objc, unsigned char *const objv[])
     proxy->output_prefix() = obj_to_str(objv[2]);
     int error = 0;
     error |= colvars->setup_output();
+    error |= colvars->write_restart_file(colvars->output_prefix()+
+                                         ".colvars.state");
     error |= colvars->write_output_files();
     return error ? COLVARSCRIPT_ERROR : COLVARS_OK;
   }
@@ -255,6 +254,10 @@ int colvarscript::run(int objc, unsigned char *const objv[])
     }
   }
 
+  if (cmd == "help") {
+    return exec_command(cv_help, NULL, objc, objv);
+  }
+
   result = "Syntax error\n" + help_string();
   return COLVARSCRIPT_ERROR;
 }
@@ -295,7 +298,9 @@ int colvarscript::proc_colvar(colvar *cv, int objc, unsigned char *const objv[])
     // colvar destructor is tasked with the cleanup
     delete cv;
     // TODO this could be done by the destructors
-    colvars->write_traj_label(*(colvars->cv_traj_os));
+    if (colvars->cv_traj_os != NULL) {
+      colvars->write_traj_label(*(colvars->cv_traj_os));
+    }
     return COLVARS_OK;
   }
 
@@ -374,7 +379,6 @@ int colvarscript::proc_colvar(colvar *cv, int objc, unsigned char *const objv[])
 
 int colvarscript::proc_bias(colvarbias *b, int objc, unsigned char *const objv[]) {
 
-  std::string const key(obj_to_str(objv[0]));
   std::string const subcmd(obj_to_str(objv[2]));
 
   if (subcmd == "energy") {
@@ -425,7 +429,9 @@ int colvarscript::proc_bias(colvarbias *b, int objc, unsigned char *const objv[]
     // the bias destructor takes care of the cleanup at cvm level
     delete b;
     // TODO this could be done by the destructors
-    colvars->write_traj_label(*(colvars->cv_traj_os));
+    if (colvars->cv_traj_os != NULL) {
+      colvars->write_traj_label(*(colvars->cv_traj_os));
+    }
     return COLVARS_OK;
   }
 
@@ -528,7 +534,7 @@ int colvarscript::proc_features(colvardeps *obj,
 }
 
 
-std::string colvarscript::help_string()
+std::string colvarscript::help_string() const
 {
   std::string buf;
   buf = "Usage: cv <subcommand> [args...]\n\
@@ -538,7 +544,7 @@ Managing the Colvars module:\n\
   config <string>             -- read configuration from the given string\n\
   reset                       -- delete all internal configuration\n\
   delete                      -- delete this Colvars module instance\n\
-  version                     -- return version of colvars code\n\
+  version                     -- return version of Colvars code\n\
   \n\
 Input and output:\n\
   list                        -- return a list of all variables\n\
@@ -564,6 +570,8 @@ Accessing collective variables:\n\
   colvar <name> type          -- return the type of colvar <name>\n\
   colvar <name> delete        -- delete colvar <name>\n\
   colvar <name> addforce <F>  -- apply given force on colvar <name>\n\
+  colvar <name> getappliedforce -- return applied force of colvar <name>\n\
+  colvar <name> gettotalforce -- return total force of colvar <name>\n\
   colvar <name> getconfig     -- return config string of colvar <name>\n\
   colvar <name> cvcflags <fl> -- enable or disable cvcs according to 0/1 flags\n\
   colvar <name> get <f>       -- get the value of the colvar feature <f>\n\
diff --git a/lib/colvars/colvarscript.h b/lib/colvars/colvarscript.h
index 94d451809cdd9feb853dbf5462342a52140d907a..39cd08934059f25e0fe598dc0a915ec79036631a 100644
--- a/lib/colvars/colvarscript.h
+++ b/lib/colvars/colvarscript.h
@@ -8,21 +8,27 @@
 // Colvars repository at GitHub.
 
 #ifndef COLVARSCRIPT_H
-#define COLVARSCRIPT_H
+//#define COLVARSCRIPT_H // Delay definition until later
 
 #include <string>
+#include <vector>
+#include <map>
+
 #include "colvarmodule.h"
 #include "colvarvalue.h"
 #include "colvarbias.h"
 #include "colvarproxy.h"
 
+
 // Only these error values are part of the scripting interface
 #define COLVARSCRIPT_ERROR -1
 #define COLVARSCRIPT_OK 0
 
+
 class colvarscript  {
 
 private:
+
   colvarproxy *proxy;
   colvarmodule *colvars;
 
@@ -35,16 +41,93 @@ public:
   colvarscript(colvarproxy * p);
   inline ~colvarscript() {}
 
-  /// If an error is caught by the proxy through fatal_error(), this is set to COLVARSCRIPT_ERROR
+  /// If an error is caught by the proxy through fatal_error(), this is set to
+  /// COLVARSCRIPT_ERROR
   int proxy_error;
 
-  /// If an error is returned by one of the methods, it should set this to the error message
+  /// If an error is returned by one of the methods, it should set this to the
+  /// error message
   std::string result;
 
   /// Run script command with given positional arguments (objects)
   int run(int objc, unsigned char *const objv[]);
 
+  /// Set the return value of the script command to the given string
+  inline void set_str_result(std::string const &s)
+  {
+    result = s;
+  }
+
+  /// Build and return a short help
+  std::string help_string(void) const;
+
+  /// Use scripting language to get the string representation of an object
+  inline char const *obj_to_str(unsigned char *const obj)
+  {
+    return cvm::proxy->script_obj_to_str(obj);
+  }
+
+  enum command {
+    cv_help,
+    cv_version,
+    cv_config,
+    cv_configfile,
+    cv_reset,
+    cv_delete,
+    cv_list,
+    cv_list_biases,
+    cv_load,
+    cv_save,
+    cv_update,
+    cv_addenergy,
+    cv_getenergy,
+    cv_printframe,
+    cv_printframelabels,
+    cv_frame,
+    cv_colvar,
+    cv_colvar_value,
+    cv_colvar_update,
+    cv_colvar_type,
+    cv_colvar_delete,
+    cv_colvar_addforce,
+    cv_colvar_getappliedforce,
+    cv_colvar_gettotalforce,
+    cv_colvar_cvcflags,
+    cv_colvar_getconfig,
+    cv_colvar_get,
+    cv_colvar_set,
+    cv_bias,
+    cv_bias_energy,
+    cv_bias_update,
+    cv_bias_delete,
+    cv_bias_getconfig,
+    cv_bias_get,
+    cv_bias_set,
+    cv_n_commands
+  };
+
+  /// Execute a script command
+  inline int exec_command(command c,
+                          void *pobj,
+                          int objc, unsigned char * const *objv)
+  {
+    return (*(comm_fns[c]))(pobj, objc, objv);
+  }
+
+  /// Get help for a command (TODO reformat for each language?)
+  inline std::string command_help(colvarscript::command c) const
+  {
+    return comm_help[c];
+  }
+
+  /// Clear all object results
+  inline void clear_results()
+  {
+    result.clear();
+  }
+
 private:
+
   /// Run subcommands on colvar
   int proc_colvar(colvar *cv, int argc, unsigned char *const argv[]);
 
@@ -55,17 +138,146 @@ private:
   int proc_features(colvardeps *obj,
                     int argc, unsigned char *const argv[]);
 
-  /// Build and return a short help
-  std::string help_string(void);
+  /// Internal identifiers of command strings
+  std::map<std::string, command> comm_str_map;
 
-public:
+  /// Help strings for each command
+  std::vector<std::string> comm_help;
 
-  inline char const *obj_to_str(unsigned char *const obj)
-  {
-    return cvm::proxy->script_obj_to_str(obj);
-  }
+  /// Number of arguments for each command
+  std::vector<size_t> comm_n_args;
+
+  /// Arguments for each command
+  std::vector< std::vector<std::string> > comm_args;
+
+  /// Implementations of each command
+  std::vector<int (*)(void *, int, unsigned char * const *)> comm_fns;
 
 };
 
 
+/// Get a pointer to the main colvarscript object
+inline static colvarscript *colvarscript_obj()
+{
+  return cvm::main()->proxy->script;
+}
+
+/// Get a pointer to the colvar object pointed to by pobj
+inline static colvar *colvar_obj(void *pobj)
+{
+  return reinterpret_cast<colvar *>(pobj);
+}
+
+/// Get a pointer to the colvarbias object pointed to by pobj
+inline static colvarbias *colvarbias_obj(void *pobj)
+{
+  return reinterpret_cast<colvarbias *>(pobj);
+}
+
+
+#define CVSCRIPT_COMM_FNAME(COMM) cvscript_ ## COMM
+
+#define CVSCRIPT_COMM_PROTO(COMM)                                       \
+  int CVSCRIPT_COMM_FNAME(COMM)(void *, int, unsigned char *const *);
+
+#define CVSCRIPT(COMM,HELP,N_ARGS_MIN,N_ARGS_MAX,ARGS,FN_BODY)  \
+  CVSCRIPT_COMM_PROTO(COMM)
+
+#undef COLVARSCRIPT_H
+#endif // #ifndef COLVARSCRIPT_H
+
+
+#ifdef COLVARSCRIPT_CPP
+#define CVSCRIPT_COMM_FN(COMM,N_ARGS_MIN,N_ARGS_MAX,ARGS,FN_BODY)       \
+  int CVSCRIPT_COMM_FNAME(COMM)(void *pobj,                             \
+                                int objc, unsigned char *const objv[])  \
+  {                                                                     \
+    colvarscript *script = colvarscript_obj();                          \
+    script->clear_results();                                            \
+    if (objc < 2+N_ARGS_MIN) /* "cv" and "COMM" are 1st and 2nd */ {    \
+      script->set_str_result("Missing arguments\n" +                    \
+                             script->command_help(colvarscript::COMM)); \
+      return COLVARSCRIPT_ERROR;                                        \
+    }                                                                   \
+    if (objc > 2+N_ARGS_MAX) {                                          \
+      script->set_str_result("Too many arguments\n" +                   \
+                             script->command_help(colvarscript::COMM)); \
+      return COLVARSCRIPT_ERROR;                                        \
+    }                                                                   \
+    FN_BODY;                                                            \
+  }
+#undef CVSCRIPT
+#define CVSCRIPT(COMM,HELP,N_ARGS_MIN,N_ARGS_MAX,ARGS,FN_BODY) \
+  CVSCRIPT_COMM_FN(COMM,N_ARGS_MIN,N_ARGS_MAX,ARGS,FN_BODY)
+#endif // #ifdef COLVARSCRIPT_CPP
+
+
+#ifdef COLVARSCRIPT_INIT_FN
+#define CVSCRIPT_COMM_INIT(COMM,HELP,ARGS) {                    \
+    comm_str_map[#COMM] = COMM;                                 \
+    comm_help[COMM] = HELP;                                     \
+    comm_fns[COMM] = &(CVSCRIPT_COMM_FNAME(COMM));              \
+  }
+#undef CVSCRIPT
+#define CVSCRIPT(COMM,HELP,N_ARGS_MIN,N_ARGS_MAX,ARGS,FN_BODY)  \
+  CVSCRIPT_COMM_INIT(COMM,HELP,ARGS)
 #endif
+
+
+#if !defined(COLVARSCRIPT_H) || defined(COLVARSCRIPT_INIT_FN)
+#define COLVARSCRIPT_H
+
+#ifndef COLVARSCRIPT_INIT_FN
+#ifdef __cplusplus
+extern "C" {
+#endif
+#endif
+
+  // Add optional arguments for command-specific help?
+  CVSCRIPT(cv_help,
+           "Print the help message",
+           0, 0,
+           {},
+           script->set_str_result(script->help_string());
+           return COLVARS_OK;
+           )
+
+  CVSCRIPT(cv_config,
+           "Read configuration from the given string",
+           1, 1,
+           { "conf (str) - Configuration string" },
+           std::string const conf(script->obj_to_str(objv[2]));
+           if (cvm::main()->read_config_string(conf) == COLVARS_OK) {
+             return COLVARS_OK;
+           }
+           script->set_str_result("Error parsing configuration string");
+           return COLVARSCRIPT_ERROR;
+           )
+
+  CVSCRIPT(cv_addenergy,
+           "Add an energy to the MD engine",
+           1, 1,
+           { "E (float) - Amount of energy to add" },
+           cvm::main()->total_bias_energy +=
+             strtod(script->obj_to_str(objv[2]), NULL);
+           return COLVARS_OK;
+           )
+
+  CVSCRIPT(cv_getenergy,
+           "Get the current Colvars energy",
+           1, 1,
+           { "E (float) - Store the energy in this variable" },
+           double *energy = reinterpret_cast<double *>(objv[2]);
+           *energy = cvm::main()->total_bias_energy;
+           return COLVARS_OK;
+           )
+
+#ifndef COLVARSCRIPT_INIT_FN
+#ifdef __cplusplus
+} // extern "C"
+#endif
+#endif
+
+#undef CVSCRIPT
+
+#endif // #ifndef COLVARSCRIPT_H
diff --git a/lib/colvars/colvartypes.cpp b/lib/colvars/colvartypes.cpp
index 428fe1a4b1d864c725cd7d5b09652216a03091a3..b604606d4628911138f2553215db9d7b3c306630 100644
--- a/lib/colvars/colvartypes.cpp
+++ b/lib/colvars/colvartypes.cpp
@@ -312,7 +312,7 @@ void colvarmodule::rotation::diagonalize_matrix(cvm::matrix2d<cvm::real> &S,
     cvm::real norm2 = 0.0;
     size_t i;
     for (i = 0; i < 4; i++) {
-      norm2 += std::pow(S_eigvec[ie][i], int(2));
+      norm2 += S_eigvec[ie][i] * S_eigvec[ie][i];
     }
     cvm::real const norm = std::sqrt(norm2);
     for (i = 0; i < 4; i++) {
diff --git a/lib/colvars/colvartypes.h b/lib/colvars/colvartypes.h
index fe3160eb4b3238ef036c2ea56d6b3954d73cb121..97257d18ad66d1da99b0b74586190e735687850b 100644
--- a/lib/colvars/colvartypes.h
+++ b/lib/colvars/colvartypes.h
@@ -705,7 +705,7 @@ public:
   {
     std::stringstream stream(s);
     size_t i = 0;
-    while ((stream >> data[i]) && (i < data.size())) {
+    while ((i < data.size()) && (stream >> data[i])) {
       i++;
     }
     if (i < data.size()) {
diff --git a/lib/colvars/lepton/include/Lepton.h b/lib/colvars/lepton/include/Lepton.h
new file mode 100644
index 0000000000000000000000000000000000000000..22edcb3ff9d43d2bdd7b1851fdf68099d4c1d195
--- /dev/null
+++ b/lib/colvars/lepton/include/Lepton.h
@@ -0,0 +1,43 @@
+#ifndef LEPTON_H_
+#define LEPTON_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   Lepton                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the Lepton expression parser originating from              *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2009 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "lepton/CompiledExpression.h"
+#include "lepton/CustomFunction.h"
+#include "lepton/ExpressionProgram.h"
+#include "lepton/ExpressionTreeNode.h"
+#include "lepton/Operation.h"
+#include "lepton/ParsedExpression.h"
+#include "lepton/Parser.h"
+
+#endif /*LEPTON_H_*/
diff --git a/lib/colvars/lepton/include/lepton/CompiledExpression.h b/lib/colvars/lepton/include/lepton/CompiledExpression.h
new file mode 100644
index 0000000000000000000000000000000000000000..67442e0cf5140a1cbcfd03945304c39ae0fff815
--- /dev/null
+++ b/lib/colvars/lepton/include/lepton/CompiledExpression.h
@@ -0,0 +1,113 @@
+#ifndef LEPTON_COMPILED_EXPRESSION_H_
+#define LEPTON_COMPILED_EXPRESSION_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   Lepton                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the Lepton expression parser originating from              *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2013-2016 Stanford University and the Authors.      *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "ExpressionTreeNode.h"
+#include "windowsIncludes.h"
+#include <map>
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+#ifdef LEPTON_USE_JIT
+    #include "asmjit.h"
+#endif
+
+namespace Lepton {
+
+class Operation;
+class ParsedExpression;
+
+/**
+ * A CompiledExpression is a highly optimized representation of an expression for cases when you want to evaluate
+ * it many times as quickly as possible.  You should treat it as an opaque object; none of the internal representation
+ * is visible.
+ * 
+ * A CompiledExpression is created by calling createCompiledExpression() on a ParsedExpression.
+ * 
+ * WARNING: CompiledExpression is NOT thread safe.  You should never access a CompiledExpression from two threads at
+ * the same time.
+ */
+
+class LEPTON_EXPORT CompiledExpression {
+public:
+    CompiledExpression();
+    CompiledExpression(const CompiledExpression& expression);
+    ~CompiledExpression();
+    CompiledExpression& operator=(const CompiledExpression& expression);
+    /**
+     * Get the names of all variables used by this expression.
+     */
+    const std::set<std::string>& getVariables() const;
+    /**
+     * Get a reference to the memory location where the value of a particular variable is stored.  This can be used
+     * to set the value of the variable before calling evaluate().
+     */
+    double& getVariableReference(const std::string& name);
+    /**
+     * You can optionally specify the memory locations from which the values of variables should be read.
+     * This is useful, for example, when several expressions all use the same variable.  You can then set
+     * the value of that variable in one place, and it will be seen by all of them.
+     */
+    void setVariableLocations(std::map<std::string, double*>& variableLocations);
+    /**
+     * Evaluate the expression.  The values of all variables should have been set before calling this.
+     */
+    double evaluate() const;
+private:
+    friend class ParsedExpression;
+    CompiledExpression(const ParsedExpression& expression);
+    void compileExpression(const ExpressionTreeNode& node, std::vector<std::pair<ExpressionTreeNode, int> >& temps);
+    int findTempIndex(const ExpressionTreeNode& node, std::vector<std::pair<ExpressionTreeNode, int> >& temps);
+    std::map<std::string, double*> variablePointers;
+    std::vector<std::pair<double*, double*> > variablesToCopy;
+    std::vector<std::vector<int> > arguments;
+    std::vector<int> target;
+    std::vector<Operation*> operation;
+    std::map<std::string, int> variableIndices;
+    std::set<std::string> variableNames;
+    mutable std::vector<double> workspace;
+    mutable std::vector<double> argValues;
+    std::map<std::string, double> dummyVariables;
+    void* jitCode;
+#ifdef LEPTON_USE_JIT
+    void generateJitCode();
+    void generateSingleArgCall(asmjit::X86Compiler& c, asmjit::X86XmmVar& dest, asmjit::X86XmmVar& arg, double (*function)(double));
+    std::vector<double> constants;
+    asmjit::JitRuntime runtime;
+#endif
+};
+
+} // namespace Lepton
+
+#endif /*LEPTON_COMPILED_EXPRESSION_H_*/
diff --git a/lib/colvars/lepton/include/lepton/CustomFunction.h b/lib/colvars/lepton/include/lepton/CustomFunction.h
new file mode 100644
index 0000000000000000000000000000000000000000..5c5586105f7f20d5ca218b6d5f12b2ad34d19629
--- /dev/null
+++ b/lib/colvars/lepton/include/lepton/CustomFunction.h
@@ -0,0 +1,77 @@
+#ifndef LEPTON_CUSTOM_FUNCTION_H_
+#define LEPTON_CUSTOM_FUNCTION_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   Lepton                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the Lepton expression parser originating from              *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2009 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "windowsIncludes.h"
+
+namespace Lepton {
+
+/**
+ * This class is the interface for defining your own function that may be included in expressions.
+ * To use it, create a concrete subclass that implements all of the virtual methods for each new function
+ * you want to define.  Then when you call Parser::parse() to parse an expression, pass a map of
+ * function names to CustomFunction objects.
+ */
+
+class LEPTON_EXPORT CustomFunction {
+public:
+    virtual ~CustomFunction() {
+    }
+    /**
+     * Get the number of arguments this function expects.
+     */
+    virtual int getNumArguments() const = 0;
+    /**
+     * Evaluate the function.
+     *
+     * @param arguments    the array of argument values
+     */
+    virtual double evaluate(const double* arguments) const = 0;
+    /**
+     * Evaluate a derivative of the function.
+     *
+     * @param arguments    the array of argument values
+     * @param derivOrder   an array specifying the number of times the function has been differentiated
+     *                     with respect to each of its arguments.  For example, the array {0, 2} indicates
+     *                     a second derivative with respect to the second argument.
+     */
+    virtual double evaluateDerivative(const double* arguments, const int* derivOrder) const = 0;
+    /**
+     * Create a new duplicate of this object on the heap using the "new" operator.
+     */
+    virtual CustomFunction* clone() const = 0;
+};
+
+} // namespace Lepton
+
+#endif /*LEPTON_CUSTOM_FUNCTION_H_*/
diff --git a/lib/colvars/lepton/include/lepton/Exception.h b/lib/colvars/lepton/include/lepton/Exception.h
new file mode 100644
index 0000000000000000000000000000000000000000..5ad55714d183e843176d62315ef54bde7872497e
--- /dev/null
+++ b/lib/colvars/lepton/include/lepton/Exception.h
@@ -0,0 +1,59 @@
+#ifndef LEPTON_EXCEPTION_H_
+#define LEPTON_EXCEPTION_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   Lepton                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the Lepton expression parser originating from              *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2009 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include <exception>
+#include <string>
+
+namespace Lepton {
+
+/**
+ * This class is used for all exceptions thrown by Lepton.
+ */
+
+class Exception : public std::exception {
+public:
+    Exception(const std::string& message) : message(message) {
+    }
+    ~Exception() throw() {
+    }
+    const char* what() const throw() {
+        return message.c_str();
+    }
+private:
+    std::string message;
+};
+
+} // namespace Lepton
+
+#endif /*LEPTON_EXCEPTION_H_*/
diff --git a/lib/colvars/lepton/include/lepton/ExpressionProgram.h b/lib/colvars/lepton/include/lepton/ExpressionProgram.h
new file mode 100644
index 0000000000000000000000000000000000000000..94d37f471d08bb7b2041aa3c42aafc6a3bc5bac0
--- /dev/null
+++ b/lib/colvars/lepton/include/lepton/ExpressionProgram.h
@@ -0,0 +1,95 @@
+#ifndef LEPTON_EXPRESSION_PROGRAM_H_
+#define LEPTON_EXPRESSION_PROGRAM_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   Lepton                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the Lepton expression parser originating from              *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2009 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "ExpressionTreeNode.h"
+#include "windowsIncludes.h"
+#include <map>
+#include <string>
+#include <vector>
+
+namespace Lepton {
+
+class ParsedExpression;
+
+/**
+ * An ExpressionProgram is a linear sequence of Operations for evaluating an expression.  The evaluation
+ * is done with a stack.  The arguments to each Operation are first taken off the stack in order, then it is
+ * evaluated and the result is pushed back onto the stack.  At the end, the stack contains a single value,
+ * which is the value of the expression.
+ *
+ * An ExpressionProgram is created by calling createProgram() on a ParsedExpression.
+ */
+
+class LEPTON_EXPORT ExpressionProgram {
+public:
+    ExpressionProgram();
+    ExpressionProgram(const ExpressionProgram& program);
+    ~ExpressionProgram();
+    ExpressionProgram& operator=(const ExpressionProgram& program);
+    /**
+     * Get the number of Operations that make up this program.
+     */
+    int getNumOperations() const;
+    /**
+     * Get an Operation in this program.
+     */
+    const Operation& getOperation(int index) const;
+    /**
+     * Get the size of the stack needed to execute this program.  This is the largest number of elements present
+     * on the stack at any point during evaluation.
+     */
+    int getStackSize() const;
+    /**
+     * Evaluate the expression.  If the expression involves any variables, this method will throw an exception.
+     */
+    double evaluate() const;
+    /**
+     * Evaluate the expression.
+     *
+     * @param variables    a map specifying the values of all variables that appear in the expression.  If any
+     *                     variable appears in the expression but is not included in this map, an exception
+     *                     will be thrown.
+     */
+    double evaluate(const std::map<std::string, double>& variables) const;
+private:
+    friend class ParsedExpression;
+    ExpressionProgram(const ParsedExpression& expression);
+    void buildProgram(const ExpressionTreeNode& node);
+    std::vector<Operation*> operations;
+    int maxArgs, stackSize;
+};
+
+} // namespace Lepton
+
+#endif /*LEPTON_EXPRESSION_PROGRAM_H_*/
diff --git a/lib/colvars/lepton/include/lepton/ExpressionTreeNode.h b/lib/colvars/lepton/include/lepton/ExpressionTreeNode.h
new file mode 100644
index 0000000000000000000000000000000000000000..bf3a9a0902aae5118c0e76b619347e851c117879
--- /dev/null
+++ b/lib/colvars/lepton/include/lepton/ExpressionTreeNode.h
@@ -0,0 +1,105 @@
+#ifndef LEPTON_EXPRESSION_TREE_NODE_H_
+#define LEPTON_EXPRESSION_TREE_NODE_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   Lepton                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the Lepton expression parser originating from              *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2009 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "windowsIncludes.h"
+#include <string>
+#include <vector>
+
+namespace Lepton {
+
+class Operation;
+
+/**
+ * This class represents a node in the abstract syntax tree representation of an expression.
+ * Each node is defined by an Operation and a set of children.  When the expression is
+ * evaluated, each child is first evaluated in order, then the resulting values are passed
+ * as the arguments to the Operation's evaluate() method.
+ */
+
+class LEPTON_EXPORT ExpressionTreeNode {
+public:
+    /**
+     * Create a new ExpressionTreeNode.
+     *
+     * @param operation    the operation for this node.  The ExpressionTreeNode takes over ownership
+     *                     of this object, and deletes it when the node is itself deleted.
+     * @param children     the children of this node
+     */
+    ExpressionTreeNode(Operation* operation, const std::vector<ExpressionTreeNode>& children);
+    /**
+     * Create a new ExpressionTreeNode with two children.
+     *
+     * @param operation    the operation for this node.  The ExpressionTreeNode takes over ownership
+     *                     of this object, and deletes it when the node is itself deleted.
+     * @param child1       the first child of this node
+     * @param child2       the second child of this node
+     */
+    ExpressionTreeNode(Operation* operation, const ExpressionTreeNode& child1, const ExpressionTreeNode& child2);
+    /**
+     * Create a new ExpressionTreeNode with one child.
+     *
+     * @param operation    the operation for this node.  The ExpressionTreeNode takes over ownership
+     *                     of this object, and deletes it when the node is itself deleted.
+     * @param child        the child of this node
+     */
+    ExpressionTreeNode(Operation* operation, const ExpressionTreeNode& child);
+    /**
+     * Create a new ExpressionTreeNode with no children.
+     *
+     * @param operation    the operation for this node.  The ExpressionTreeNode takes over ownership
+     *                     of this object, and deletes it when the node is itself deleted.
+     */
+    ExpressionTreeNode(Operation* operation);
+    ExpressionTreeNode(const ExpressionTreeNode& node);
+    ExpressionTreeNode();
+    ~ExpressionTreeNode();
+    bool operator==(const ExpressionTreeNode& node) const;
+    bool operator!=(const ExpressionTreeNode& node) const;
+    ExpressionTreeNode& operator=(const ExpressionTreeNode& node);
+    /**
+     * Get the Operation performed by this node.
+     */
+    const Operation& getOperation() const;
+    /**
+     * Get this node's child nodes.
+     */
+    const std::vector<ExpressionTreeNode>& getChildren() const;
+private:
+    Operation* operation;
+    std::vector<ExpressionTreeNode> children;
+};
+
+} // namespace Lepton
+
+#endif /*LEPTON_EXPRESSION_TREE_NODE_H_*/
diff --git a/lib/colvars/lepton/include/lepton/Operation.h b/lib/colvars/lepton/include/lepton/Operation.h
new file mode 100644
index 0000000000000000000000000000000000000000..f7a8b78163403eea136b92c48a35d7b69e97a419
--- /dev/null
+++ b/lib/colvars/lepton/include/lepton/Operation.h
@@ -0,0 +1,1165 @@
+#ifndef LEPTON_OPERATION_H_
+#define LEPTON_OPERATION_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   Lepton                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the Lepton expression parser originating from              *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2009-2015 Stanford University and the Authors.      *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "windowsIncludes.h"
+#include "CustomFunction.h"
+#include "Exception.h"
+#include <cmath>
+#include <map>
+#include <string>
+#include <vector>
+#include <sstream>
+#include <algorithm>
+
+namespace Lepton {
+
+class ExpressionTreeNode;
+
+/**
+ * An Operation represents a single step in the evaluation of an expression, such as a function,
+ * an operator, or a constant value.  Each Operation takes some number of values as arguments
+ * and produces a single value.
+ *
+ * This is an abstract class with subclasses for specific operations.
+ */
+
+class LEPTON_EXPORT Operation {
+public:
+    virtual ~Operation() {
+    }
+    /**
+     * This enumeration lists all Operation subclasses.  This is provided so that switch statements
+     * can be used when processing or analyzing parsed expressions.
+     */
+    enum Id {CONSTANT, VARIABLE, CUSTOM, ADD, SUBTRACT, MULTIPLY, DIVIDE, POWER, NEGATE, SQRT, EXP, LOG,
+             SIN, COS, SEC, CSC, TAN, COT, ASIN, ACOS, ATAN, SINH, COSH, TANH, ERF, ERFC, STEP, DELTA, SQUARE, CUBE, RECIPROCAL,
+             ADD_CONSTANT, MULTIPLY_CONSTANT, POWER_CONSTANT, MIN, MAX, ABS, FLOOR, CEIL, SELECT};
+    /**
+     * Get the name of this Operation.
+     */
+    virtual std::string getName() const = 0;
+    /**
+     * Get this Operation's ID.
+     */
+    virtual Id getId() const = 0;
+    /**
+     * Get the number of arguments this operation expects.
+     */
+    virtual int getNumArguments() const = 0;
+    /**
+     * Create a clone of this Operation.
+     */
+    virtual Operation* clone() const = 0;
+    /**
+     * Perform the computation represented by this operation.
+     *
+     * @param args        the array of arguments
+     * @param variables   a map containing the values of all variables
+     * @return the result of performing the computation.
+     */
+    virtual double evaluate(double* args, const std::map<std::string, double>& variables) const = 0;
+    /**
+     * Return an ExpressionTreeNode which represents the analytic derivative of this Operation with respect to a variable.
+     *
+     * @param children     the child nodes
+     * @param childDerivs  the derivatives of the child nodes with respect to the variable
+     * @param variable     the variable with respect to which the derivate should be taken
+     */
+    virtual ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const = 0;
+    /**
+     * Get whether this operation should be displayed with infix notation.
+     */
+    virtual bool isInfixOperator() const {
+        return false;
+    }
+    /**
+     * Get whether this is a symmetric binary operation, such that exchanging its arguments
+     * does not affect the result.
+     */
+    virtual bool isSymmetric() const {
+        return false;
+    }
+    virtual bool operator!=(const Operation& op) const {
+        return op.getId() != getId();
+    }
+    virtual bool operator==(const Operation& op) const {
+        return !(*this != op);
+    }
+    class Constant;
+    class Variable;
+    class Custom;
+    class Add;
+    class Subtract;
+    class Multiply;
+    class Divide;
+    class Power;
+    class Negate;
+    class Sqrt;
+    class Exp;
+    class Log;
+    class Sin;
+    class Cos;
+    class Sec;
+    class Csc;
+    class Tan;
+    class Cot;
+    class Asin;
+    class Acos;
+    class Atan;
+    class Sinh;
+    class Cosh;
+    class Tanh;
+    class Erf;
+    class Erfc;
+    class Step;
+    class Delta;
+    class Square;
+    class Cube;
+    class Reciprocal;
+    class AddConstant;
+    class MultiplyConstant;
+    class PowerConstant;
+    class Min;
+    class Max;
+    class Abs;
+    class Floor;
+    class Ceil;
+    class Select;
+};
+
+class LEPTON_EXPORT Operation::Constant : public Operation {
+public:
+    Constant(double value) : value(value) {
+    }
+    std::string getName() const {
+        std::stringstream name;
+        name << value;
+        return name.str();
+    }
+    Id getId() const {
+        return CONSTANT;
+    }
+    int getNumArguments() const {
+        return 0;
+    }
+    Operation* clone() const {
+        return new Constant(value);
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return value;
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+    double getValue() const {
+        return value;
+    }
+    bool operator!=(const Operation& op) const {
+        const Constant* o = dynamic_cast<const Constant*>(&op);
+        return (o == NULL || o->value != value);
+    }
+private:
+    double value;
+};
+
+class LEPTON_EXPORT Operation::Variable : public Operation {
+public:
+    Variable(const std::string& name) : name(name) {
+    }
+    std::string getName() const {
+        return name;
+    }
+    Id getId() const {
+        return VARIABLE;
+    }
+    int getNumArguments() const {
+        return 0;
+    }
+    Operation* clone() const {
+        return new Variable(name);
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        std::map<std::string, double>::const_iterator iter = variables.find(name);
+        if (iter == variables.end())
+            throw Exception("No value specified for variable "+name);
+        return iter->second;
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+    bool operator!=(const Operation& op) const {
+        const Variable* o = dynamic_cast<const Variable*>(&op);
+        return (o == NULL || o->name != name);
+    }
+private:
+    std::string name;
+};
+
+class LEPTON_EXPORT Operation::Custom : public Operation {
+public:
+    Custom(const std::string& name, CustomFunction* function) : name(name), function(function), isDerivative(false), derivOrder(function->getNumArguments(), 0) {
+    }
+    Custom(const Custom& base, int derivIndex) : name(base.name), function(base.function->clone()), isDerivative(true), derivOrder(base.derivOrder) {
+        derivOrder[derivIndex]++;
+    }
+    ~Custom() {
+        delete function;
+    }
+    std::string getName() const {
+        return name;
+    }
+    Id getId() const {
+        return CUSTOM;
+    }
+    int getNumArguments() const {
+        return function->getNumArguments();
+    }
+    Operation* clone() const {
+        Custom* clone = new Custom(name, function->clone());
+        clone->isDerivative = isDerivative;
+        clone->derivOrder = derivOrder;
+        return clone;
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        if (isDerivative)
+            return function->evaluateDerivative(args, &derivOrder[0]);
+        return function->evaluate(args);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+    const std::vector<int>& getDerivOrder() const {
+        return derivOrder;
+    }
+    bool operator!=(const Operation& op) const {
+        const Custom* o = dynamic_cast<const Custom*>(&op);
+        return (o == NULL || o->name != name || o->isDerivative != isDerivative || o->derivOrder != derivOrder);
+    }
+private:
+    std::string name;
+    CustomFunction* function;
+    bool isDerivative;
+    std::vector<int> derivOrder;
+};
+
+class LEPTON_EXPORT Operation::Add : public Operation {
+public:
+    Add() {
+    }
+    std::string getName() const {
+        return "+";
+    }
+    Id getId() const {
+        return ADD;
+    }
+    int getNumArguments() const {
+        return 2;
+    }
+    Operation* clone() const {
+        return new Add();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return args[0]+args[1];
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+    bool isInfixOperator() const {
+        return true;
+    }
+    bool isSymmetric() const {
+        return true;
+    }
+};
+
+class LEPTON_EXPORT Operation::Subtract : public Operation {
+public:
+    Subtract() {
+    }
+    std::string getName() const {
+        return "-";
+    }
+    Id getId() const {
+        return SUBTRACT;
+    }
+    int getNumArguments() const {
+        return 2;
+    }
+    Operation* clone() const {
+        return new Subtract();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return args[0]-args[1];
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+    bool isInfixOperator() const {
+        return true;
+    }
+};
+
+class LEPTON_EXPORT Operation::Multiply : public Operation {
+public:
+    Multiply() {
+    }
+    std::string getName() const {
+        return "*";
+    }
+    Id getId() const {
+        return MULTIPLY;
+    }
+    int getNumArguments() const {
+        return 2;
+    }
+    Operation* clone() const {
+        return new Multiply();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return args[0]*args[1];
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+    bool isInfixOperator() const {
+        return true;
+    }
+    bool isSymmetric() const {
+        return true;
+    }
+};
+
+class LEPTON_EXPORT Operation::Divide : public Operation {
+public:
+    Divide() {
+    }
+    std::string getName() const {
+        return "/";
+    }
+    Id getId() const {
+        return DIVIDE;
+    }
+    int getNumArguments() const {
+        return 2;
+    }
+    Operation* clone() const {
+        return new Divide();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return args[0]/args[1];
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+    bool isInfixOperator() const {
+        return true;
+    }
+};
+
+class LEPTON_EXPORT Operation::Power : public Operation {
+public:
+    Power() {
+    }
+    std::string getName() const {
+        return "^";
+    }
+    Id getId() const {
+        return POWER;
+    }
+    int getNumArguments() const {
+        return 2;
+    }
+    Operation* clone() const {
+        return new Power();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return std::pow(args[0], args[1]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+    bool isInfixOperator() const {
+        return true;
+    }
+};
+
+class LEPTON_EXPORT Operation::Negate : public Operation {
+public:
+    Negate() {
+    }
+    std::string getName() const {
+        return "-";
+    }
+    Id getId() const {
+        return NEGATE;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Negate();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return -args[0];
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Sqrt : public Operation {
+public:
+    Sqrt() {
+    }
+    std::string getName() const {
+        return "sqrt";
+    }
+    Id getId() const {
+        return SQRT;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Sqrt();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return std::sqrt(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Exp : public Operation {
+public:
+    Exp() {
+    }
+    std::string getName() const {
+        return "exp";
+    }
+    Id getId() const {
+        return EXP;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Exp();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return std::exp(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Log : public Operation {
+public:
+    Log() {
+    }
+    std::string getName() const {
+        return "log";
+    }
+    Id getId() const {
+        return LOG;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Log();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return std::log(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Sin : public Operation {
+public:
+    Sin() {
+    }
+    std::string getName() const {
+        return "sin";
+    }
+    Id getId() const {
+        return SIN;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Sin();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return std::sin(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Cos : public Operation {
+public:
+    Cos() {
+    }
+    std::string getName() const {
+        return "cos";
+    }
+    Id getId() const {
+        return COS;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Cos();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return std::cos(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Sec : public Operation {
+public:
+    Sec() {
+    }
+    std::string getName() const {
+        return "sec";
+    }
+    Id getId() const {
+        return SEC;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Sec();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return 1.0/std::cos(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Csc : public Operation {
+public:
+    Csc() {
+    }
+    std::string getName() const {
+        return "csc";
+    }
+    Id getId() const {
+        return CSC;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Csc();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return 1.0/std::sin(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Tan : public Operation {
+public:
+    Tan() {
+    }
+    std::string getName() const {
+        return "tan";
+    }
+    Id getId() const {
+        return TAN;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Tan();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return std::tan(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Cot : public Operation {
+public:
+    Cot() {
+    }
+    std::string getName() const {
+        return "cot";
+    }
+    Id getId() const {
+        return COT;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Cot();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return 1.0/std::tan(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Asin : public Operation {
+public:
+    Asin() {
+    }
+    std::string getName() const {
+        return "asin";
+    }
+    Id getId() const {
+        return ASIN;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Asin();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return std::asin(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Acos : public Operation {
+public:
+    Acos() {
+    }
+    std::string getName() const {
+        return "acos";
+    }
+    Id getId() const {
+        return ACOS;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Acos();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return std::acos(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Atan : public Operation {
+public:
+    Atan() {
+    }
+    std::string getName() const {
+        return "atan";
+    }
+    Id getId() const {
+        return ATAN;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Atan();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return std::atan(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Sinh : public Operation {
+public:
+    Sinh() {
+    }
+    std::string getName() const {
+        return "sinh";
+    }
+    Id getId() const {
+        return SINH;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Sinh();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return std::sinh(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Cosh : public Operation {
+public:
+    Cosh() {
+    }
+    std::string getName() const {
+        return "cosh";
+    }
+    Id getId() const {
+        return COSH;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Cosh();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return std::cosh(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Tanh : public Operation {
+public:
+    Tanh() {
+    }
+    std::string getName() const {
+        return "tanh";
+    }
+    Id getId() const {
+        return TANH;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Tanh();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return std::tanh(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Erf : public Operation {
+public:
+    Erf() {
+    }
+    std::string getName() const {
+        return "erf";
+    }
+    Id getId() const {
+        return ERF;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Erf();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const;
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Erfc : public Operation {
+public:
+    Erfc() {
+    }
+    std::string getName() const {
+        return "erfc";
+    }
+    Id getId() const {
+        return ERFC;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Erfc();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const;
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Step : public Operation {
+public:
+    Step() {
+    }
+    std::string getName() const {
+        return "step";
+    }
+    Id getId() const {
+        return STEP;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Step();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return (args[0] >= 0.0 ? 1.0 : 0.0);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Delta : public Operation {
+public:
+    Delta() {
+    }
+    std::string getName() const {
+        return "delta";
+    }
+    Id getId() const {
+        return DELTA;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Delta();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return (args[0] == 0.0 ? 1.0 : 0.0);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Square : public Operation {
+public:
+    Square() {
+    }
+    std::string getName() const {
+        return "square";
+    }
+    Id getId() const {
+        return SQUARE;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Square();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return args[0]*args[0];
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Cube : public Operation {
+public:
+    Cube() {
+    }
+    std::string getName() const {
+        return "cube";
+    }
+    Id getId() const {
+        return CUBE;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Cube();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return args[0]*args[0]*args[0];
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Reciprocal : public Operation {
+public:
+    Reciprocal() {
+    }
+    std::string getName() const {
+        return "recip";
+    }
+    Id getId() const {
+        return RECIPROCAL;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Reciprocal();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return 1.0/args[0];
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::AddConstant : public Operation {
+public:
+    AddConstant(double value) : value(value) {
+    }
+    std::string getName() const {
+        std::stringstream name;
+        name << value << "+";
+        return name.str();
+    }
+    Id getId() const {
+        return ADD_CONSTANT;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new AddConstant(value);
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return args[0]+value;
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+    double getValue() const {
+        return value;
+    }
+    bool operator!=(const Operation& op) const {
+        const AddConstant* o = dynamic_cast<const AddConstant*>(&op);
+        return (o == NULL || o->value != value);
+    }
+private:
+    double value;
+};
+
+class LEPTON_EXPORT Operation::MultiplyConstant : public Operation {
+public:
+    MultiplyConstant(double value) : value(value) {
+    }
+    std::string getName() const {
+        std::stringstream name;
+        name << value << "*";
+        return name.str();
+    }
+    Id getId() const {
+        return MULTIPLY_CONSTANT;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new MultiplyConstant(value);
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return args[0]*value;
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+    double getValue() const {
+        return value;
+    }
+    bool operator!=(const Operation& op) const {
+        const MultiplyConstant* o = dynamic_cast<const MultiplyConstant*>(&op);
+        return (o == NULL || o->value != value);
+    }
+private:
+    double value;
+};
+
+class LEPTON_EXPORT Operation::PowerConstant : public Operation {
+public:
+    PowerConstant(double value) : value(value) {
+        intValue = (int) value;
+        isIntPower = (intValue == value);
+    }
+    std::string getName() const {
+        std::stringstream name;
+        name << "^" << value;
+        return name.str();
+    }
+    Id getId() const {
+        return POWER_CONSTANT;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new PowerConstant(value);
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        if (isIntPower) {
+            // Integer powers can be computed much more quickly by repeated multiplication.
+            
+            int exponent = intValue;
+            double base = args[0];
+            if (exponent < 0) {
+                exponent = -exponent;
+                base = 1.0/base;
+            }
+            double result = 1.0;
+            while (exponent != 0) {
+                if ((exponent&1) == 1)
+                    result *= base;
+                base *= base;
+                exponent = exponent>>1;
+           }
+           return result;
+        }
+        else
+        return std::pow(args[0], value);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+    double getValue() const {
+        return value;
+    }
+    bool operator!=(const Operation& op) const {
+        const PowerConstant* o = dynamic_cast<const PowerConstant*>(&op);
+        return (o == NULL || o->value != value);
+    }
+    bool isInfixOperator() const {
+        return true;
+    }
+private:
+    double value;
+    int intValue;
+    bool isIntPower;
+};
+
+class LEPTON_EXPORT Operation::Min : public Operation {
+public:
+    Min() {
+    }
+    std::string getName() const {
+        return "min";
+    }
+    Id getId() const {
+        return MIN;
+    }
+    int getNumArguments() const {
+        return 2;
+    }
+    Operation* clone() const {
+        return new Min();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        // parens around (std::min) are workaround for horrible microsoft max/min macro trouble
+        return (std::min)(args[0], args[1]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Max : public Operation {
+public:
+    Max() {
+    }
+    std::string getName() const {
+        return "max";
+    }
+    Id getId() const {
+        return MAX;
+    }
+    int getNumArguments() const {
+        return 2;
+    }
+    Operation* clone() const {
+        return new Max();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        // parens around (std::min) are workaround for horrible microsoft max/min macro trouble
+        return (std::max)(args[0], args[1]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Abs : public Operation {
+public:
+    Abs() {
+    }
+    std::string getName() const {
+        return "abs";
+    }
+    Id getId() const {
+        return ABS;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Abs();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return std::abs(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Floor : public Operation {
+public:
+
+    Floor() {
+    }
+    std::string getName() const {
+        return "floor";
+    }
+    Id getId() const {
+        return FLOOR;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Floor();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return std::floor(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Ceil : public Operation {
+public:
+    Ceil() {
+    }
+    std::string getName() const {
+        return "ceil";
+    }
+    Id getId() const {
+        return CEIL;
+    }
+    int getNumArguments() const {
+        return 1;
+    }
+    Operation* clone() const {
+        return new Ceil();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return std::ceil(args[0]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+class LEPTON_EXPORT Operation::Select : public Operation {
+public:
+    Select() {
+    }
+    std::string getName() const {
+        return "select";
+    }
+    Id getId() const {
+        return SELECT;
+    }
+    int getNumArguments() const {
+        return 3;
+    }
+    Operation* clone() const {
+        return new Select();
+    }
+    double evaluate(double* args, const std::map<std::string, double>& variables) const {
+        return (args[0] != 0.0 ? args[1] : args[2]);
+    }
+    ExpressionTreeNode differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const;
+};
+
+} // namespace Lepton
+
+#endif /*LEPTON_OPERATION_H_*/
diff --git a/lib/colvars/lepton/include/lepton/ParsedExpression.h b/lib/colvars/lepton/include/lepton/ParsedExpression.h
new file mode 100644
index 0000000000000000000000000000000000000000..d88b3d5829167cd19b2b122f86f974719a620435
--- /dev/null
+++ b/lib/colvars/lepton/include/lepton/ParsedExpression.h
@@ -0,0 +1,130 @@
+#ifndef LEPTON_PARSED_EXPRESSION_H_
+#define LEPTON_PARSED_EXPRESSION_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   Lepton                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the Lepton expression parser originating from              *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2009=2013 Stanford University and the Authors.      *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "ExpressionTreeNode.h"
+#include "windowsIncludes.h"
+#include <map>
+#include <string>
+
+namespace Lepton {
+
+class CompiledExpression;
+class ExpressionProgram;
+
+/**
+ * This class represents the result of parsing an expression.  It provides methods for working with the
+ * expression in various ways, such as evaluating it, getting the tree representation of the expresson, etc.
+ */
+
+class LEPTON_EXPORT ParsedExpression {
+public:
+    /**
+     * Create an uninitialized ParsedExpression.  This exists so that ParsedExpressions can be put in STL containers.
+     * Doing anything with it will produce an exception.
+     */
+    ParsedExpression();
+    /**
+     * Create a ParsedExpression.  Normally you will not call this directly.  Instead, use the Parser class
+     * to parse expression.
+     */
+    ParsedExpression(const ExpressionTreeNode& rootNode);
+    /**
+     * Get the root node of the expression's abstract syntax tree.
+     */
+    const ExpressionTreeNode& getRootNode() const;
+    /**
+     * Evaluate the expression.  If the expression involves any variables, this method will throw an exception.
+     */
+    double evaluate() const;
+    /**
+     * Evaluate the expression.
+     *
+     * @param variables    a map specifying the values of all variables that appear in the expression.  If any
+     *                     variable appears in the expression but is not included in this map, an exception
+     *                     will be thrown.
+     */
+    double evaluate(const std::map<std::string, double>& variables) const;
+    /**
+     * Create a new ParsedExpression which produces the same result as this one, but is faster to evaluate.
+     */
+    ParsedExpression optimize() const;
+    /**
+     * Create a new ParsedExpression which produces the same result as this one, but is faster to evaluate.
+     *
+     * @param variables    a map specifying values for a subset of variables that appear in the expression.
+     *                     All occurrences of these variables in the expression are replaced with the values
+     *                     specified.
+     */
+    ParsedExpression optimize(const std::map<std::string, double>& variables) const;
+    /**
+     * Create a new ParsedExpression which is the analytic derivative of this expression with respect to a
+     * particular variable.
+     *
+     * @param variable     the variable with respect to which the derivate should be taken
+     */
+    ParsedExpression differentiate(const std::string& variable) const;
+    /**
+     * Create an ExpressionProgram that represents the same calculation as this expression.
+     */
+    ExpressionProgram createProgram() const;
+    /**
+     * Create a CompiledExpression that represents the same calculation as this expression.
+     */
+    CompiledExpression createCompiledExpression() const;
+    /**
+     * Create a new ParsedExpression which is identical to this one, except that the names of some
+     * variables have been changed.
+     *
+     * @param replacements    a map whose keys are the names of variables, and whose values are the
+     *                        new names to replace them with
+     */
+    ParsedExpression renameVariables(const std::map<std::string, std::string>& replacements) const;
+private:
+    static double evaluate(const ExpressionTreeNode& node, const std::map<std::string, double>& variables);
+    static ExpressionTreeNode preevaluateVariables(const ExpressionTreeNode& node, const std::map<std::string, double>& variables);
+    static ExpressionTreeNode precalculateConstantSubexpressions(const ExpressionTreeNode& node);
+    static ExpressionTreeNode substituteSimplerExpression(const ExpressionTreeNode& node);
+    static ExpressionTreeNode differentiate(const ExpressionTreeNode& node, const std::string& variable);
+    static double getConstantValue(const ExpressionTreeNode& node);
+    static ExpressionTreeNode renameNodeVariables(const ExpressionTreeNode& node, const std::map<std::string, std::string>& replacements);
+    ExpressionTreeNode rootNode;
+};
+
+LEPTON_EXPORT std::ostream& operator<<(std::ostream& out, const ExpressionTreeNode& node);
+
+LEPTON_EXPORT std::ostream& operator<<(std::ostream& out, const ParsedExpression& exp);
+
+} // namespace Lepton
+
+#endif /*LEPTON_PARSED_EXPRESSION_H_*/
diff --git a/lib/colvars/lepton/include/lepton/Parser.h b/lib/colvars/lepton/include/lepton/Parser.h
new file mode 100644
index 0000000000000000000000000000000000000000..63d5988d5fa2af79718bbb2a1df6298b43b6260d
--- /dev/null
+++ b/lib/colvars/lepton/include/lepton/Parser.h
@@ -0,0 +1,77 @@
+#ifndef LEPTON_PARSER_H_
+#define LEPTON_PARSER_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   Lepton                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the Lepton expression parser originating from              *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2009 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "windowsIncludes.h"
+#include <map>
+#include <string>
+#include <vector>
+
+namespace Lepton {
+
+class CustomFunction;
+class ExpressionTreeNode;
+class Operation;
+class ParsedExpression;
+class ParseToken;
+
+/**
+ * This class provides the main interface for parsing expressions.
+ */
+
+class LEPTON_EXPORT Parser {
+public:
+    /**
+     * Parse a mathematical expression and return a representation of it as an abstract syntax tree.
+     */
+    static ParsedExpression parse(const std::string& expression);
+    /**
+     * Parse a mathematical expression and return a representation of it as an abstract syntax tree.
+     *
+     * @param customFunctions   a map specifying user defined functions that may appear in the expression.
+     *                          The key are function names, and the values are corresponding CustomFunction objects.
+     */
+    static ParsedExpression parse(const std::string& expression, const std::map<std::string, CustomFunction*>& customFunctions);
+private:
+    static std::string trim(const std::string& expression);
+    static std::vector<ParseToken> tokenize(const std::string& expression);
+    static ParseToken getNextToken(const std::string& expression, int start);
+    static ExpressionTreeNode parsePrecedence(const std::vector<ParseToken>& tokens, int& pos, const std::map<std::string, CustomFunction*>& customFunctions,
+            const std::map<std::string, ExpressionTreeNode>& subexpressionDefs, int precedence);
+    static Operation* getOperatorOperation(const std::string& name);
+    static Operation* getFunctionOperation(const std::string& name, const std::map<std::string, CustomFunction*>& customFunctions);
+};
+
+} // namespace Lepton
+
+#endif /*LEPTON_PARSER_H_*/
diff --git a/lib/colvars/lepton/include/lepton/windowsIncludes.h b/lib/colvars/lepton/include/lepton/windowsIncludes.h
new file mode 100644
index 0000000000000000000000000000000000000000..798229850e78305231f6216a883361422180d9a7
--- /dev/null
+++ b/lib/colvars/lepton/include/lepton/windowsIncludes.h
@@ -0,0 +1,41 @@
+#ifndef LEPTON_WINDOW_INCLUDE_H_
+#define LEPTON_WINDOW_INCLUDE_H_
+
+/*
+ * Shared libraries are messy in Visual Studio. We have to distinguish three
+ * cases:
+ *   (1) this header is being used to build the Lepton shared library
+ *       (dllexport)
+ *   (2) this header is being used by a *client* of the Lepton shared
+ *       library (dllimport)
+ *   (3) we are building the Lepton static library, or the client is
+ *       being compiled with the expectation of linking with the
+ *       Lepton static library (nothing special needed)
+ * In the CMake script for building this library, we define one of the symbols
+ *     Lepton_BUILDING_{SHARED|STATIC}_LIBRARY
+ * Client code normally has no special symbol defined, in which case we'll
+ * assume it wants to use the shared library. However, if the client defines
+ * the symbol LEPTON_USE_STATIC_LIBRARIES we'll suppress the dllimport so
+ * that the client code can be linked with static libraries. Note that
+ * the client symbol is not library dependent, while the library symbols
+ * affect only the Lepton library, meaning that other libraries can
+ * be clients of this one. However, we are assuming all-static or all-shared.
+ */
+
+#ifdef _MSC_VER
+    // We don't want to hear about how sprintf is "unsafe".
+    #pragma warning(disable:4996)
+    // Keep MS VC++ quiet about lack of dll export of private members.
+    #pragma warning(disable:4251)
+    #if defined(LEPTON_BUILDING_SHARED_LIBRARY)
+        #define LEPTON_EXPORT __declspec(dllexport)
+    #elif defined(LEPTON_BUILDING_STATIC_LIBRARY) || defined(LEPTON_USE_STATIC_LIBRARIES)
+        #define LEPTON_EXPORT
+    #else
+        #define LEPTON_EXPORT __declspec(dllimport)   // i.e., a client of a shared library
+    #endif
+#else
+    #define LEPTON_EXPORT // Linux, Mac
+#endif
+
+#endif // LEPTON_WINDOW_INCLUDE_H_
diff --git a/lib/colvars/lepton/src/CompiledExpression.cpp b/lib/colvars/lepton/src/CompiledExpression.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..302f294ee2380e58326833d526b77b1904755ee6
--- /dev/null
+++ b/lib/colvars/lepton/src/CompiledExpression.cpp
@@ -0,0 +1,400 @@
+/* -------------------------------------------------------------------------- *
+ *                                   Lepton                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the Lepton expression parser originating from              *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2013-2016 Stanford University and the Authors.      *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "lepton/CompiledExpression.h"
+#include "lepton/Operation.h"
+#include "lepton/ParsedExpression.h"
+#include <utility>
+
+using namespace Lepton;
+using namespace std;
+#ifdef LEPTON_USE_JIT
+    using namespace asmjit;
+#endif
+
+CompiledExpression::CompiledExpression() : jitCode(NULL) {
+}
+
+CompiledExpression::CompiledExpression(const ParsedExpression& expression) : jitCode(NULL) {
+    ParsedExpression expr = expression.optimize(); // Just in case it wasn't already optimized.
+    vector<pair<ExpressionTreeNode, int> > temps;
+    compileExpression(expr.getRootNode(), temps);
+    int maxArguments = 1;
+    for (int i = 0; i < (int) operation.size(); i++)
+        if (operation[i]->getNumArguments() > maxArguments)
+            maxArguments = operation[i]->getNumArguments();
+    argValues.resize(maxArguments);
+#ifdef LEPTON_USE_JIT
+    generateJitCode();
+#endif
+}
+
+CompiledExpression::~CompiledExpression() {
+    for (int i = 0; i < (int) operation.size(); i++)
+        if (operation[i] != NULL)
+            delete operation[i];
+}
+
+CompiledExpression::CompiledExpression(const CompiledExpression& expression) : jitCode(NULL) {
+    *this = expression;
+}
+
+CompiledExpression& CompiledExpression::operator=(const CompiledExpression& expression) {
+    arguments = expression.arguments;
+    target = expression.target;
+    variableIndices = expression.variableIndices;
+    variableNames = expression.variableNames;
+    workspace.resize(expression.workspace.size());
+    argValues.resize(expression.argValues.size());
+    operation.resize(expression.operation.size());
+    for (int i = 0; i < (int) operation.size(); i++)
+        operation[i] = expression.operation[i]->clone();
+    setVariableLocations(variablePointers);
+    return *this;
+}
+
+void CompiledExpression::compileExpression(const ExpressionTreeNode& node, vector<pair<ExpressionTreeNode, int> >& temps) {
+    if (findTempIndex(node, temps) != -1)
+        return; // We have already processed a node identical to this one.
+    
+    // Process the child nodes.
+    
+    vector<int> args;
+    for (int i = 0; i < node.getChildren().size(); i++) {
+        compileExpression(node.getChildren()[i], temps);
+        args.push_back(findTempIndex(node.getChildren()[i], temps));
+    }
+    
+    // Process this node.
+    
+    if (node.getOperation().getId() == Operation::VARIABLE) {
+        variableIndices[node.getOperation().getName()] = (int) workspace.size();
+        variableNames.insert(node.getOperation().getName());
+    }
+    else {
+        int stepIndex = (int) arguments.size();
+        arguments.push_back(vector<int>());
+        target.push_back((int) workspace.size());
+        operation.push_back(node.getOperation().clone());
+        if (args.size() == 0)
+            arguments[stepIndex].push_back(0); // The value won't actually be used.  We just need something there.
+        else {
+            // If the arguments are sequential, we can just pass a pointer to the first one.
+            
+            bool sequential = true;
+            for (int i = 1; i < args.size(); i++)
+                if (args[i] != args[i-1]+1)
+                    sequential = false;
+            if (sequential)
+                arguments[stepIndex].push_back(args[0]);
+            else
+                arguments[stepIndex] = args;
+        }
+    }
+    temps.push_back(make_pair(node, (int) workspace.size()));
+    workspace.push_back(0.0);
+}
+
+int CompiledExpression::findTempIndex(const ExpressionTreeNode& node, vector<pair<ExpressionTreeNode, int> >& temps) {
+    for (int i = 0; i < (int) temps.size(); i++)
+        if (temps[i].first == node)
+            return i;
+    return -1;
+}
+
+const set<string>& CompiledExpression::getVariables() const {
+    return variableNames;
+}
+
+double& CompiledExpression::getVariableReference(const string& name) {
+    map<string, double*>::iterator pointer = variablePointers.find(name);
+    if (pointer != variablePointers.end())
+        return *pointer->second;
+    map<string, int>::iterator index = variableIndices.find(name);
+    if (index == variableIndices.end())
+        throw Exception("getVariableReference: Unknown variable '"+name+"'");
+    return workspace[index->second];
+}
+
+void CompiledExpression::setVariableLocations(map<string, double*>& variableLocations) {
+    variablePointers = variableLocations;
+#ifdef LEPTON_USE_JIT
+    // Rebuild the JIT code.
+    
+    if (workspace.size() > 0)
+        generateJitCode();
+#else
+    // Make a list of all variables we will need to copy before evaluating the expression.
+    
+    variablesToCopy.clear();
+    for (map<string, int>::const_iterator iter = variableIndices.begin(); iter != variableIndices.end(); ++iter) {
+        map<string, double*>::iterator pointer = variablePointers.find(iter->first);
+        if (pointer != variablePointers.end())
+            variablesToCopy.push_back(make_pair(&workspace[iter->second], pointer->second));
+    }
+#endif
+}
+
+double CompiledExpression::evaluate() const {
+#ifdef LEPTON_USE_JIT
+    return ((double (*)()) jitCode)();
+#else
+    for (int i = 0; i < variablesToCopy.size(); i++)
+        *variablesToCopy[i].first = *variablesToCopy[i].second;
+
+    // Loop over the operations and evaluate each one.
+    
+    for (int step = 0; step < operation.size(); step++) {
+        const vector<int>& args = arguments[step];
+        if (args.size() == 1)
+            workspace[target[step]] = operation[step]->evaluate(&workspace[args[0]], dummyVariables);
+        else {
+            for (int i = 0; i < args.size(); i++)
+                argValues[i] = workspace[args[i]];
+            workspace[target[step]] = operation[step]->evaluate(&argValues[0], dummyVariables);
+        }
+    }
+    return workspace[workspace.size()-1];
+#endif
+}
+
+#ifdef LEPTON_USE_JIT
+static double evaluateOperation(Operation* op, double* args) {
+    map<string, double>* dummyVariables = NULL;
+    return op->evaluate(args, *dummyVariables);
+}
+
+void CompiledExpression::generateJitCode() {
+    X86Compiler c(&runtime);
+    c.addFunc(kFuncConvHost, FuncBuilder0<double>());
+    vector<X86XmmVar> workspaceVar(workspace.size());
+    for (int i = 0; i < (int) workspaceVar.size(); i++)
+        workspaceVar[i] = c.newXmmVar(kX86VarTypeXmmSd);
+    X86GpVar argsPointer(c);
+    c.mov(argsPointer, imm_ptr(&argValues[0]));
+    
+    // Load the arguments into variables.
+    
+    for (set<string>::const_iterator iter = variableNames.begin(); iter != variableNames.end(); ++iter) {
+        map<string, int>::iterator index = variableIndices.find(*iter);
+        X86GpVar variablePointer(c);
+        c.mov(variablePointer, imm_ptr(&getVariableReference(index->first)));
+        c.movsd(workspaceVar[index->second], x86::ptr(variablePointer, 0, 0));
+    }
+
+    // Make a list of all constants that will be needed for evaluation.
+    
+    vector<int> operationConstantIndex(operation.size(), -1);
+    for (int step = 0; step < (int) operation.size(); step++) {
+        // Find the constant value (if any) used by this operation.
+        
+        Operation& op = *operation[step];
+        double value;
+        if (op.getId() == Operation::CONSTANT)
+            value = dynamic_cast<Operation::Constant&>(op).getValue();
+        else if (op.getId() == Operation::ADD_CONSTANT)
+            value = dynamic_cast<Operation::AddConstant&>(op).getValue();
+        else if (op.getId() == Operation::MULTIPLY_CONSTANT)
+            value = dynamic_cast<Operation::MultiplyConstant&>(op).getValue();
+        else if (op.getId() == Operation::RECIPROCAL)
+            value = 1.0;
+        else if (op.getId() == Operation::STEP)
+            value = 1.0;
+        else if (op.getId() == Operation::DELTA)
+            value = 1.0;
+        else
+            continue;
+        
+        // See if we already have a variable for this constant.
+        
+        for (int i = 0; i < (int) constants.size(); i++)
+            if (value == constants[i]) {
+                operationConstantIndex[step] = i;
+                break;
+            }
+        if (operationConstantIndex[step] == -1) {
+            operationConstantIndex[step] = constants.size();
+            constants.push_back(value);
+        }
+    }
+    
+    // Load constants into variables.
+    
+    vector<X86XmmVar> constantVar(constants.size());
+    if (constants.size() > 0) {
+        X86GpVar constantsPointer(c);
+        c.mov(constantsPointer, imm_ptr(&constants[0]));
+        for (int i = 0; i < (int) constants.size(); i++) {
+            constantVar[i] = c.newXmmVar(kX86VarTypeXmmSd);
+            c.movsd(constantVar[i], x86::ptr(constantsPointer, 8*i, 0));
+        }
+    }
+    
+    // Evaluate the operations.
+    
+    for (int step = 0; step < (int) operation.size(); step++) {
+        Operation& op = *operation[step];
+        vector<int> args = arguments[step];
+        if (args.size() == 1) {
+            // One or more sequential arguments.  Fill out the list.
+            
+            for (int i = 1; i < op.getNumArguments(); i++)
+                args.push_back(args[0]+i);
+        }
+        
+        // Generate instructions to execute this operation.
+        
+        switch (op.getId()) {
+            case Operation::CONSTANT:
+                c.movsd(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
+                break;
+            case Operation::ADD:
+                c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]);
+                c.addsd(workspaceVar[target[step]], workspaceVar[args[1]]);
+                break;
+            case Operation::SUBTRACT:
+                c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]);
+                c.subsd(workspaceVar[target[step]], workspaceVar[args[1]]);
+                break;
+            case Operation::MULTIPLY:
+                c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]);
+                c.mulsd(workspaceVar[target[step]], workspaceVar[args[1]]);
+                break;
+            case Operation::DIVIDE:
+                c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]);
+                c.divsd(workspaceVar[target[step]], workspaceVar[args[1]]);
+                break;
+            case Operation::NEGATE:
+                c.xorps(workspaceVar[target[step]], workspaceVar[target[step]]);
+                c.subsd(workspaceVar[target[step]], workspaceVar[args[0]]);
+                break;
+            case Operation::SQRT:
+                c.sqrtsd(workspaceVar[target[step]], workspaceVar[args[0]]);
+                break;
+            case Operation::EXP:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], exp);
+                break;
+            case Operation::LOG:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], log);
+                break;
+            case Operation::SIN:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], sin);
+                break;
+            case Operation::COS:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], cos);
+                break;
+            case Operation::TAN:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], tan);
+                break;
+            case Operation::ASIN:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], asin);
+                break;
+            case Operation::ACOS:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], acos);
+                break;
+            case Operation::ATAN:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], atan);
+                break;
+            case Operation::SINH:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], sinh);
+                break;
+            case Operation::COSH:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], cosh);
+                break;
+            case Operation::TANH:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], tanh);
+                break;
+            case Operation::STEP:
+                c.xorps(workspaceVar[target[step]], workspaceVar[target[step]]);
+                c.cmpsd(workspaceVar[target[step]], workspaceVar[args[0]], imm(18)); // Comparison mode is _CMP_LE_OQ = 18
+                c.andps(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
+                break;
+            case Operation::DELTA:
+                c.xorps(workspaceVar[target[step]], workspaceVar[target[step]]);
+                c.cmpsd(workspaceVar[target[step]], workspaceVar[args[0]], imm(16)); // Comparison mode is _CMP_EQ_OS = 16
+                c.andps(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
+                break;
+            case Operation::SQUARE:
+                c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]);
+                c.mulsd(workspaceVar[target[step]], workspaceVar[args[0]]);
+                break;
+            case Operation::CUBE:
+                c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]);
+                c.mulsd(workspaceVar[target[step]], workspaceVar[args[0]]);
+                c.mulsd(workspaceVar[target[step]], workspaceVar[args[0]]);
+                break;
+            case Operation::RECIPROCAL:
+                c.movsd(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
+                c.divsd(workspaceVar[target[step]], workspaceVar[args[0]]);
+                break;
+            case Operation::ADD_CONSTANT:
+                c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]);
+                c.addsd(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
+                break;
+            case Operation::MULTIPLY_CONSTANT:
+                c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]);
+                c.mulsd(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
+                break;
+            case Operation::ABS:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], fabs);
+                break;
+            case Operation::FLOOR:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], floor);
+                break;
+            case Operation::CEIL:
+                generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], ceil);
+                break;
+            default:
+                // Just invoke evaluateOperation().
+                
+                for (int i = 0; i < (int) args.size(); i++)
+                    c.movsd(x86::ptr(argsPointer, 8*i, 0), workspaceVar[args[i]]);
+                X86GpVar fn(c, kVarTypeIntPtr);
+                c.mov(fn, imm_ptr((void*) evaluateOperation));
+                X86CallNode* call = c.call(fn, kFuncConvHost, FuncBuilder2<double, Operation*, double*>());
+                call->setArg(0, imm_ptr(&op));
+                call->setArg(1, imm_ptr(&argValues[0]));
+                call->setRet(0, workspaceVar[target[step]]);
+        }
+    }
+    c.ret(workspaceVar[workspace.size()-1]);
+    c.endFunc();
+    jitCode = c.make();
+}
+
+void CompiledExpression::generateSingleArgCall(X86Compiler& c, X86XmmVar& dest, X86XmmVar& arg, double (*function)(double)) {
+    X86GpVar fn(c, kVarTypeIntPtr);
+    c.mov(fn, imm_ptr((void*) function));
+    X86CallNode* call = c.call(fn, kFuncConvHost, FuncBuilder1<double, double>());
+    call->setArg(0, arg);
+    call->setRet(0, dest);
+}
+#endif
diff --git a/lib/colvars/lepton/src/ExpressionProgram.cpp b/lib/colvars/lepton/src/ExpressionProgram.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..65d3f0c79a6fac0a33a6fd6be49c8ab3f7678fee
--- /dev/null
+++ b/lib/colvars/lepton/src/ExpressionProgram.cpp
@@ -0,0 +1,105 @@
+/* -------------------------------------------------------------------------- *
+ *                                   Lepton                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the Lepton expression parser originating from              *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2009-2013 Stanford University and the Authors.      *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "lepton/ExpressionProgram.h"
+#include "lepton/Operation.h"
+#include "lepton/ParsedExpression.h"
+
+using namespace Lepton;
+using namespace std;
+
+ExpressionProgram::ExpressionProgram() : maxArgs(0), stackSize(0) {
+}
+
+ExpressionProgram::ExpressionProgram(const ParsedExpression& expression) : maxArgs(0), stackSize(0) {
+    buildProgram(expression.getRootNode());
+    int currentStackSize = 0;
+    for (int i = 0; i < (int) operations.size(); i++) {
+        int args = operations[i]->getNumArguments();
+        if (args > maxArgs)
+            maxArgs = args;
+        currentStackSize += 1-args;
+        if (currentStackSize > stackSize)
+            stackSize = currentStackSize;
+    }
+}
+
+ExpressionProgram::~ExpressionProgram() {
+    for (int i = 0; i < (int) operations.size(); i++)
+        delete operations[i];
+}
+
+ExpressionProgram::ExpressionProgram(const ExpressionProgram& program) {
+    *this = program;
+}
+
+ExpressionProgram& ExpressionProgram::operator=(const ExpressionProgram& program) {
+    maxArgs = program.maxArgs;
+    stackSize = program.stackSize;
+    operations.resize(program.operations.size());
+    for (int i = 0; i < (int) operations.size(); i++)
+        operations[i] = program.operations[i]->clone();
+    return *this;
+}
+
+void ExpressionProgram::buildProgram(const ExpressionTreeNode& node) {
+    for (int i = (int) node.getChildren().size()-1; i >= 0; i--)
+        buildProgram(node.getChildren()[i]);
+    operations.push_back(node.getOperation().clone());
+}
+
+int ExpressionProgram::getNumOperations() const {
+    return (int) operations.size();
+}
+
+const Operation& ExpressionProgram::getOperation(int index) const {
+    return *operations[index];
+}
+
+int ExpressionProgram::getStackSize() const {
+    return stackSize;
+}
+
+double ExpressionProgram::evaluate() const {
+    return evaluate(map<string, double>());
+}
+
+double ExpressionProgram::evaluate(const std::map<std::string, double>& variables) const {
+    vector<double> stack(stackSize+1);
+    int stackPointer = stackSize;
+    for (int i = 0; i < (int) operations.size(); i++) {
+        int numArgs = operations[i]->getNumArguments();
+        double result = operations[i]->evaluate(&stack[stackPointer], variables);
+        stackPointer += numArgs-1;
+        stack[stackPointer] = result;
+    }
+    return stack[stackSize-1];
+}
diff --git a/lib/colvars/lepton/src/ExpressionTreeNode.cpp b/lib/colvars/lepton/src/ExpressionTreeNode.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8519878262ab67e9e1d82712697b1d49b821168e
--- /dev/null
+++ b/lib/colvars/lepton/src/ExpressionTreeNode.cpp
@@ -0,0 +1,107 @@
+/* -------------------------------------------------------------------------- *
+ *                                   Lepton                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the Lepton expression parser originating from              *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2009-2015 Stanford University and the Authors.      *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "lepton/ExpressionTreeNode.h"
+#include "lepton/Exception.h"
+#include "lepton/Operation.h"
+
+using namespace Lepton;
+using namespace std;
+
+ExpressionTreeNode::ExpressionTreeNode(Operation* operation, const vector<ExpressionTreeNode>& children) : operation(operation), children(children) {
+    if (operation->getNumArguments() != children.size())
+        throw Exception("wrong number of arguments to function: "+operation->getName());
+}
+
+ExpressionTreeNode::ExpressionTreeNode(Operation* operation, const ExpressionTreeNode& child1, const ExpressionTreeNode& child2) : operation(operation) {
+    children.push_back(child1);
+    children.push_back(child2);
+    if (operation->getNumArguments() != children.size())
+        throw Exception("wrong number of arguments to function: "+operation->getName());
+}
+
+ExpressionTreeNode::ExpressionTreeNode(Operation* operation, const ExpressionTreeNode& child) : operation(operation) {
+    children.push_back(child);
+    if (operation->getNumArguments() != children.size())
+        throw Exception("wrong number of arguments to function: "+operation->getName());
+}
+
+ExpressionTreeNode::ExpressionTreeNode(Operation* operation) : operation(operation) {
+    if (operation->getNumArguments() != children.size())
+        throw Exception("wrong number of arguments to function: "+operation->getName());
+}
+
+ExpressionTreeNode::ExpressionTreeNode(const ExpressionTreeNode& node) : operation(node.operation == NULL ? NULL : node.operation->clone()), children(node.getChildren()) {
+}
+
+ExpressionTreeNode::ExpressionTreeNode() : operation(NULL) {
+}
+
+ExpressionTreeNode::~ExpressionTreeNode() {
+    if (operation != NULL)
+        delete operation;
+}
+
+bool ExpressionTreeNode::operator!=(const ExpressionTreeNode& node) const {
+    if (node.getOperation() != getOperation())
+        return true;
+    if (getOperation().isSymmetric() && getChildren().size() == 2) {
+        if (getChildren()[0] == node.getChildren()[0] && getChildren()[1] == node.getChildren()[1])
+            return false;
+        if (getChildren()[0] == node.getChildren()[1] && getChildren()[1] == node.getChildren()[0])
+            return false;
+        return true;
+    }
+    for (int i = 0; i < (int) getChildren().size(); i++)
+        if (getChildren()[i] != node.getChildren()[i])
+            return true;
+    return false;
+}
+
+bool ExpressionTreeNode::operator==(const ExpressionTreeNode& node) const {
+    return !(*this != node);
+}
+
+ExpressionTreeNode& ExpressionTreeNode::operator=(const ExpressionTreeNode& node) {
+    if (operation != NULL)
+        delete operation;
+    operation = node.getOperation().clone();
+    children = node.getChildren();
+    return *this;
+}
+
+const Operation& ExpressionTreeNode::getOperation() const {
+    return *operation;
+}
+
+const vector<ExpressionTreeNode>& ExpressionTreeNode::getChildren() const {
+    return children;
+}
diff --git a/lib/colvars/lepton/src/MSVC_erfc.h b/lib/colvars/lepton/src/MSVC_erfc.h
new file mode 100644
index 0000000000000000000000000000000000000000..eadb20fdf89322f3836c058be530e10d53a629d6
--- /dev/null
+++ b/lib/colvars/lepton/src/MSVC_erfc.h
@@ -0,0 +1,87 @@
+#ifndef LEPTON_MSVC_ERFC_H_
+#define LEPTON_MSVC_ERFC_H_
+
+/*
+ * Up to version 11 (VC++ 2012), Microsoft does not support the
+ * standard C99 erf() and erfc() functions so we have to fake them here. 
+ * These were added in version 12 (VC++ 2013), which sets _MSC_VER=1800
+ * (VC11 has _MSC_VER=1700).
+ */
+
+#if defined(_MSC_VER) 
+#define M_PI 3.14159265358979323846264338327950288
+
+#if _MSC_VER <= 1700 // 1700 is VC11, 1800 is VC12 
+/***************************
+*   erf.cpp
+*   author:  Steve Strand
+*   written: 29-Jan-04
+***************************/
+
+#include <cmath>
+
+static const double rel_error= 1E-12;        //calculate 12 significant figures
+//you can adjust rel_error to trade off between accuracy and speed
+//but don't ask for > 15 figures (assuming usual 52 bit mantissa in a double)
+
+static double erfc(double x);
+
+static double erf(double x)
+//erf(x) = 2/sqrt(pi)*integral(exp(-t^2),t,0,x)
+//       = 2/sqrt(pi)*[x - x^3/3 + x^5/5*2! - x^7/7*3! + ...]
+//       = 1-erfc(x)
+{
+    static const double two_sqrtpi=  1.128379167095512574;        // 2/sqrt(pi)
+    if (fabs(x) > 2.2) {
+        return 1.0 - erfc(x);        //use continued fraction when fabs(x) > 2.2
+    }
+    double sum= x, term= x, xsqr= x*x;
+    int j= 1;
+    do {
+        term*= xsqr/j;
+        sum-= term/(2*j+1);
+        ++j;
+        term*= xsqr/j;
+        sum+= term/(2*j+1);
+        ++j;
+    } while (fabs(term)/sum > rel_error);
+    return two_sqrtpi*sum;
+}
+
+
+static double erfc(double x)
+//erfc(x) = 2/sqrt(pi)*integral(exp(-t^2),t,x,inf)
+//        = exp(-x^2)/sqrt(pi) * [1/x+ (1/2)/x+ (2/2)/x+ (3/2)/x+ (4/2)/x+ ...]
+//        = 1-erf(x)
+//expression inside [] is a continued fraction so '+' means add to denominator only
+{
+    static const double one_sqrtpi=  0.564189583547756287;        // 1/sqrt(pi)
+    if (fabs(x) < 2.2) {
+        return 1.0 - erf(x);        //use series when fabs(x) < 2.2
+    }
+    // Don't look for x==0 here!
+    if (x < 0) {               //continued fraction only valid for x>0
+        return 2.0 - erfc(-x);
+    }
+    double a=1, b=x;                //last two convergent numerators
+    double c=x, d=x*x+0.5;          //last two convergent denominators
+    double q1, q2= b/d;             //last two convergents (a/c and b/d)
+    double n= 1.0, t;
+    do {
+        t= a*n+b*x;
+        a= b;
+        b= t;
+        t= c*n+d*x;
+        c= d;
+        d= t;
+        n+= 0.5;
+        q1= q2;
+        q2= b/d;
+      } while (fabs(q1-q2)/q2 > rel_error);
+    return one_sqrtpi*exp(-x*x)*q2;
+}
+
+#endif // _MSC_VER <= 1700
+#endif // _MSC_VER
+
+#endif // LEPTON_MSVC_ERFC_H_
diff --git a/lib/colvars/lepton/src/Operation.cpp b/lib/colvars/lepton/src/Operation.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..693dea2edec60db68740b715a8d589d32131bead
--- /dev/null
+++ b/lib/colvars/lepton/src/Operation.cpp
@@ -0,0 +1,335 @@
+
+/* -------------------------------------------------------------------------- *
+ *                                   Lepton                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the Lepton expression parser originating from              *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2009-2015 Stanford University and the Authors.      *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "lepton/Operation.h"
+#include "lepton/ExpressionTreeNode.h"
+#include "MSVC_erfc.h"
+
+using namespace Lepton;
+using namespace std;
+
+double Operation::Erf::evaluate(double* args, const map<string, double>& variables) const {
+    return erf(args[0]);
+}
+
+double Operation::Erfc::evaluate(double* args, const map<string, double>& variables) const {
+    return erfc(args[0]);
+}
+
+ExpressionTreeNode Operation::Constant::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Constant(0.0));
+}
+
+ExpressionTreeNode Operation::Variable::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    if (variable == name)
+        return ExpressionTreeNode(new Operation::Constant(1.0));
+    return ExpressionTreeNode(new Operation::Constant(0.0));
+}
+
+ExpressionTreeNode Operation::Custom::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    if (function->getNumArguments() == 0)
+        return ExpressionTreeNode(new Operation::Constant(0.0));
+    ExpressionTreeNode result = ExpressionTreeNode(new Operation::Multiply(), ExpressionTreeNode(new Operation::Custom(*this, 0), children), childDerivs[0]);
+    for (int i = 1; i < getNumArguments(); i++) {
+        result = ExpressionTreeNode(new Operation::Add(),
+                                    result,
+                                    ExpressionTreeNode(new Operation::Multiply(), ExpressionTreeNode(new Operation::Custom(*this, i), children), childDerivs[i]));
+    }
+    return result;
+}
+
+ExpressionTreeNode Operation::Add::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Add(), childDerivs[0], childDerivs[1]);
+}
+
+ExpressionTreeNode Operation::Subtract::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Subtract(), childDerivs[0], childDerivs[1]);
+}
+
+ExpressionTreeNode Operation::Multiply::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Add(),
+                              ExpressionTreeNode(new Operation::Multiply(), children[0], childDerivs[1]),
+                              ExpressionTreeNode(new Operation::Multiply(), children[1], childDerivs[0]));
+}
+
+ExpressionTreeNode Operation::Divide::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Divide(),
+                              ExpressionTreeNode(new Operation::Subtract(),
+                                                 ExpressionTreeNode(new Operation::Multiply(), children[1], childDerivs[0]),
+                                                 ExpressionTreeNode(new Operation::Multiply(), children[0], childDerivs[1])),
+                              ExpressionTreeNode(new Operation::Square(), children[1]));
+}
+
+ExpressionTreeNode Operation::Power::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Add(),
+                              ExpressionTreeNode(new Operation::Multiply(),
+                                                 ExpressionTreeNode(new Operation::Multiply(),
+                                                                    children[1],
+                                                                    ExpressionTreeNode(new Operation::Power(),
+                                                                                       children[0], ExpressionTreeNode(new Operation::AddConstant(-1.0), children[1]))),
+                                                 childDerivs[0]),
+                              ExpressionTreeNode(new Operation::Multiply(),
+                                                 ExpressionTreeNode(new Operation::Multiply(),
+                                                                    ExpressionTreeNode(new Operation::Log(), children[0]),
+                                                                    ExpressionTreeNode(new Operation::Power(), children[0], children[1])),
+                                                 childDerivs[1]));
+}
+
+ExpressionTreeNode Operation::Negate::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Negate(), childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Sqrt::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::MultiplyConstant(0.5),
+                                                 ExpressionTreeNode(new Operation::Reciprocal(),
+                                                                    ExpressionTreeNode(new Operation::Sqrt(), children[0]))),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Exp::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::Exp(), children[0]),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Log::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::Reciprocal(), children[0]),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Sin::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::Cos(), children[0]),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Cos::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::Negate(),
+                                                 ExpressionTreeNode(new Operation::Sin(), children[0])),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Sec::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::Multiply(),
+                                                 ExpressionTreeNode(new Operation::Sec(), children[0]),
+                                                 ExpressionTreeNode(new Operation::Tan(), children[0])),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Csc::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::Negate(),
+                                                 ExpressionTreeNode(new Operation::Multiply(),
+                                                                    ExpressionTreeNode(new Operation::Csc(), children[0]),
+                                                                    ExpressionTreeNode(new Operation::Cot(), children[0]))),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Tan::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::Square(),
+                                                 ExpressionTreeNode(new Operation::Sec(), children[0])),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Cot::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::Negate(),
+                                                 ExpressionTreeNode(new Operation::Square(),
+                                                                    ExpressionTreeNode(new Operation::Csc(), children[0]))),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Asin::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::Reciprocal(),
+                                                 ExpressionTreeNode(new Operation::Sqrt(),
+                                                                    ExpressionTreeNode(new Operation::Subtract(),
+                                                                                       ExpressionTreeNode(new Operation::Constant(1.0)),
+                                                                                       ExpressionTreeNode(new Operation::Square(), children[0])))),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Acos::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::Negate(),
+                                                 ExpressionTreeNode(new Operation::Reciprocal(),
+                                                                    ExpressionTreeNode(new Operation::Sqrt(),
+                                                                                       ExpressionTreeNode(new Operation::Subtract(),
+                                                                                                          ExpressionTreeNode(new Operation::Constant(1.0)),
+                                                                                                          ExpressionTreeNode(new Operation::Square(), children[0]))))),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Atan::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::Reciprocal(),
+                                                 ExpressionTreeNode(new Operation::AddConstant(1.0),
+                                                                    ExpressionTreeNode(new Operation::Square(), children[0]))),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Sinh::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::Cosh(),
+                                                 children[0]),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Cosh::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::Sinh(),
+                                                 children[0]),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Tanh::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::Subtract(),
+                                                 ExpressionTreeNode(new Operation::Constant(1.0)),
+                                                 ExpressionTreeNode(new Operation::Square(),
+                                                                    ExpressionTreeNode(new Operation::Tanh(), children[0]))),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Erf::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::Multiply(),
+                                                 ExpressionTreeNode(new Operation::Constant(2.0/sqrt(M_PI))),
+                                                 ExpressionTreeNode(new Operation::Exp(),
+                                                                    ExpressionTreeNode(new Operation::Negate(),
+                                                                                       ExpressionTreeNode(new Operation::Square(), children[0])))),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Erfc::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::Multiply(),
+                                                 ExpressionTreeNode(new Operation::Constant(-2.0/sqrt(M_PI))),
+                                                 ExpressionTreeNode(new Operation::Exp(),
+                                                                    ExpressionTreeNode(new Operation::Negate(),
+                                                                                       ExpressionTreeNode(new Operation::Square(), children[0])))),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Step::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Constant(0.0));
+}
+
+ExpressionTreeNode Operation::Delta::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Constant(0.0));
+}
+
+ExpressionTreeNode Operation::Square::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::MultiplyConstant(2.0),
+                                                 children[0]),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Cube::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::MultiplyConstant(3.0),
+                                                 ExpressionTreeNode(new Operation::Square(), children[0])),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Reciprocal::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::Negate(),
+                                                 ExpressionTreeNode(new Operation::Reciprocal(),
+                                                                    ExpressionTreeNode(new Operation::Square(), children[0]))),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::AddConstant::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return childDerivs[0];
+}
+
+ExpressionTreeNode Operation::MultiplyConstant::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::MultiplyConstant(value),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::PowerConstant::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              ExpressionTreeNode(new Operation::MultiplyConstant(value),
+                                                 ExpressionTreeNode(new Operation::PowerConstant(value-1),
+                                                                    children[0])),
+                              childDerivs[0]);
+}
+
+ExpressionTreeNode Operation::Min::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    ExpressionTreeNode step(new Operation::Step(),
+                            ExpressionTreeNode(new Operation::Subtract(), children[0], children[1]));
+    return ExpressionTreeNode(new Operation::Subtract(),
+                              ExpressionTreeNode(new Operation::Multiply(), childDerivs[1], step),
+                              ExpressionTreeNode(new Operation::Multiply(), childDerivs[0],
+                                                 ExpressionTreeNode(new Operation::AddConstant(-1), step)));
+}
+
+ExpressionTreeNode Operation::Max::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    ExpressionTreeNode step(new Operation::Step(),
+                            ExpressionTreeNode(new Operation::Subtract(), children[0], children[1]));
+    return ExpressionTreeNode(new Operation::Subtract(),
+                              ExpressionTreeNode(new Operation::Multiply(), childDerivs[0], step),
+                              ExpressionTreeNode(new Operation::Multiply(), childDerivs[1],
+                                                 ExpressionTreeNode(new Operation::AddConstant(-1), step)));
+}
+
+ExpressionTreeNode Operation::Abs::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    ExpressionTreeNode step(new Operation::Step(), children[0]);
+    return ExpressionTreeNode(new Operation::Multiply(),
+                              childDerivs[0],
+                              ExpressionTreeNode(new Operation::AddConstant(-1),
+                                                 ExpressionTreeNode(new Operation::MultiplyConstant(2), step)));
+}
+
+ExpressionTreeNode Operation::Floor::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Constant(0.0));
+}
+
+ExpressionTreeNode Operation::Ceil::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    return ExpressionTreeNode(new Operation::Constant(0.0));
+}
+
+ExpressionTreeNode Operation::Select::differentiate(const std::vector<ExpressionTreeNode>& children, const std::vector<ExpressionTreeNode>& childDerivs, const std::string& variable) const {
+    vector<ExpressionTreeNode> derivChildren;
+    derivChildren.push_back(children[0]);
+    derivChildren.push_back(childDerivs[1]);
+    derivChildren.push_back(childDerivs[2]);
+    return ExpressionTreeNode(new Operation::Select(), derivChildren);
+}
diff --git a/lib/colvars/lepton/src/ParsedExpression.cpp b/lib/colvars/lepton/src/ParsedExpression.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6effd060072f7a9e10261c49d8812da15e850f8b
--- /dev/null
+++ b/lib/colvars/lepton/src/ParsedExpression.cpp
@@ -0,0 +1,351 @@
+/* -------------------------------------------------------------------------- *
+ *                                   Lepton                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the Lepton expression parser originating from              *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2009 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "lepton/ParsedExpression.h"
+#include "lepton/CompiledExpression.h"
+#include "lepton/ExpressionProgram.h"
+#include "lepton/Operation.h"
+#include <limits>
+#include <vector>
+
+using namespace Lepton;
+using namespace std;
+
+ParsedExpression::ParsedExpression() : rootNode(ExpressionTreeNode()) {
+}
+
+ParsedExpression::ParsedExpression(const ExpressionTreeNode& rootNode) : rootNode(rootNode) {
+}
+
+const ExpressionTreeNode& ParsedExpression::getRootNode() const {
+    if (&rootNode.getOperation() == NULL)
+        throw Exception("Illegal call to an initialized ParsedExpression");
+    return rootNode;
+}
+
+double ParsedExpression::evaluate() const {
+    return evaluate(getRootNode(), map<string, double>());
+}
+
+double ParsedExpression::evaluate(const map<string, double>& variables) const {
+    return evaluate(getRootNode(), variables);
+}
+
+double ParsedExpression::evaluate(const ExpressionTreeNode& node, const map<string, double>& variables) {
+    int numArgs = (int) node.getChildren().size();
+    vector<double> args(max(numArgs, 1));
+    for (int i = 0; i < numArgs; i++)
+        args[i] = evaluate(node.getChildren()[i], variables);
+    return node.getOperation().evaluate(&args[0], variables);
+}
+
+ParsedExpression ParsedExpression::optimize() const {
+    ExpressionTreeNode result = precalculateConstantSubexpressions(getRootNode());
+    while (true) {
+        ExpressionTreeNode simplified = substituteSimplerExpression(result);
+        if (simplified == result)
+            break;
+        result = simplified;
+    }
+    return ParsedExpression(result);
+}
+
+ParsedExpression ParsedExpression::optimize(const map<string, double>& variables) const {
+    ExpressionTreeNode result = preevaluateVariables(getRootNode(), variables);
+    result = precalculateConstantSubexpressions(result);
+    while (true) {
+        ExpressionTreeNode simplified = substituteSimplerExpression(result);
+        if (simplified == result)
+            break;
+        result = simplified;
+    }
+    return ParsedExpression(result);
+}
+
+ExpressionTreeNode ParsedExpression::preevaluateVariables(const ExpressionTreeNode& node, const map<string, double>& variables) {
+    if (node.getOperation().getId() == Operation::VARIABLE) {
+        const Operation::Variable& var = dynamic_cast<const Operation::Variable&>(node.getOperation());
+        map<string, double>::const_iterator iter = variables.find(var.getName());
+        if (iter == variables.end())
+            return node;
+        return ExpressionTreeNode(new Operation::Constant(iter->second));
+    }
+    vector<ExpressionTreeNode> children(node.getChildren().size());
+    for (int i = 0; i < (int) children.size(); i++)
+        children[i] = preevaluateVariables(node.getChildren()[i], variables);
+    return ExpressionTreeNode(node.getOperation().clone(), children);
+}
+
+ExpressionTreeNode ParsedExpression::precalculateConstantSubexpressions(const ExpressionTreeNode& node) {
+    vector<ExpressionTreeNode> children(node.getChildren().size());
+    for (int i = 0; i < (int) children.size(); i++)
+        children[i] = precalculateConstantSubexpressions(node.getChildren()[i]);
+    ExpressionTreeNode result = ExpressionTreeNode(node.getOperation().clone(), children);
+    if (node.getOperation().getId() == Operation::VARIABLE || node.getOperation().getId() == Operation::CUSTOM)
+        return result;
+    for (int i = 0; i < (int) children.size(); i++)
+        if (children[i].getOperation().getId() != Operation::CONSTANT)
+            return result;
+    return ExpressionTreeNode(new Operation::Constant(evaluate(result, map<string, double>())));
+}
+
+ExpressionTreeNode ParsedExpression::substituteSimplerExpression(const ExpressionTreeNode& node) {
+    vector<ExpressionTreeNode> children(node.getChildren().size());
+    for (int i = 0; i < (int) children.size(); i++)
+        children[i] = substituteSimplerExpression(node.getChildren()[i]);
+    switch (node.getOperation().getId()) {
+        case Operation::ADD:
+        {
+            double first = getConstantValue(children[0]);
+            double second = getConstantValue(children[1]);
+            if (first == 0.0) // Add 0
+                return children[1];
+            if (second == 0.0) // Add 0
+                return children[0];
+            if (first == first) // Add a constant
+                return ExpressionTreeNode(new Operation::AddConstant(first), children[1]);
+            if (second == second) // Add a constant
+                return ExpressionTreeNode(new Operation::AddConstant(second), children[0]);
+            if (children[1].getOperation().getId() == Operation::NEGATE) // a+(-b) = a-b
+                return ExpressionTreeNode(new Operation::Subtract(), children[0], children[1].getChildren()[0]);
+            if (children[0].getOperation().getId() == Operation::NEGATE) // (-a)+b = b-a
+                return ExpressionTreeNode(new Operation::Subtract(), children[1], children[0].getChildren()[0]);
+            break;
+        }
+        case Operation::SUBTRACT:
+        {
+            if (children[0] == children[1])
+                return ExpressionTreeNode(new Operation::Constant(0.0)); // Subtracting anything from itself is 0
+            double first = getConstantValue(children[0]);
+            if (first == 0.0) // Subtract from 0
+                return ExpressionTreeNode(new Operation::Negate(), children[1]);
+            double second = getConstantValue(children[1]);
+            if (second == 0.0) // Subtract 0
+                return children[0];
+            if (second == second) // Subtract a constant
+                return ExpressionTreeNode(new Operation::AddConstant(-second), children[0]);
+            if (children[1].getOperation().getId() == Operation::NEGATE) // a-(-b) = a+b
+                return ExpressionTreeNode(new Operation::Add(), children[0], children[1].getChildren()[0]);
+            break;
+        }
+        case Operation::MULTIPLY:
+        {
+            double first = getConstantValue(children[0]);
+            double second = getConstantValue(children[1]);
+            if (first == 0.0 || second == 0.0) // Multiply by 0
+                return ExpressionTreeNode(new Operation::Constant(0.0));
+            if (first == 1.0) // Multiply by 1
+                return children[1];
+            if (second == 1.0) // Multiply by 1
+                return children[0];
+            if (children[0].getOperation().getId() == Operation::CONSTANT) { // Multiply by a constant
+                if (children[1].getOperation().getId() == Operation::MULTIPLY_CONSTANT) // Combine two multiplies into a single one
+                    return ExpressionTreeNode(new Operation::MultiplyConstant(first*dynamic_cast<const Operation::MultiplyConstant*>(&children[1].getOperation())->getValue()), children[1].getChildren()[0]);
+                return ExpressionTreeNode(new Operation::MultiplyConstant(first), children[1]);
+            }
+            if (children[1].getOperation().getId() == Operation::CONSTANT) { // Multiply by a constant
+                if (children[0].getOperation().getId() == Operation::MULTIPLY_CONSTANT) // Combine two multiplies into a single one
+                    return ExpressionTreeNode(new Operation::MultiplyConstant(second*dynamic_cast<const Operation::MultiplyConstant*>(&children[0].getOperation())->getValue()), children[0].getChildren()[0]);
+                return ExpressionTreeNode(new Operation::MultiplyConstant(second), children[0]);
+            }
+            if (children[0].getOperation().getId() == Operation::NEGATE && children[1].getOperation().getId() == Operation::NEGATE) // The two negations cancel
+                return ExpressionTreeNode(new Operation::Multiply(), children[0].getChildren()[0], children[1].getChildren()[0]);
+            if (children[0].getOperation().getId() == Operation::NEGATE && children[1].getOperation().getId() == Operation::MULTIPLY_CONSTANT) // Negate the constant
+                return ExpressionTreeNode(new Operation::Multiply(), children[0].getChildren()[0], ExpressionTreeNode(new Operation::MultiplyConstant(-dynamic_cast<const Operation::MultiplyConstant*>(&children[1].getOperation())->getValue()), children[1].getChildren()[0]));
+            if (children[1].getOperation().getId() == Operation::NEGATE && children[0].getOperation().getId() == Operation::MULTIPLY_CONSTANT) // Negate the constant
+                return ExpressionTreeNode(new Operation::Multiply(), ExpressionTreeNode(new Operation::MultiplyConstant(-dynamic_cast<const Operation::MultiplyConstant*>(&children[0].getOperation())->getValue()), children[0].getChildren()[0]), children[1].getChildren()[0]);
+            if (children[0].getOperation().getId() == Operation::NEGATE) // Pull the negation out so it can possibly be optimized further
+                return ExpressionTreeNode(new Operation::Negate(), ExpressionTreeNode(new Operation::Multiply(), children[0].getChildren()[0], children[1]));
+            if (children[1].getOperation().getId() == Operation::NEGATE) // Pull the negation out so it can possibly be optimized further
+                return ExpressionTreeNode(new Operation::Negate(), ExpressionTreeNode(new Operation::Multiply(), children[0], children[1].getChildren()[0]));
+            if (children[1].getOperation().getId() == Operation::RECIPROCAL) // a*(1/b) = a/b
+                return ExpressionTreeNode(new Operation::Divide(), children[0], children[1].getChildren()[0]);
+            if (children[0].getOperation().getId() == Operation::RECIPROCAL) // (1/a)*b = b/a
+                return ExpressionTreeNode(new Operation::Divide(), children[1], children[0].getChildren()[0]);
+            if (children[0] == children[1])
+                return ExpressionTreeNode(new Operation::Square(), children[0]); // x*x = square(x)
+            if (children[0].getOperation().getId() == Operation::SQUARE && children[0].getChildren()[0] == children[1])
+                return ExpressionTreeNode(new Operation::Cube(), children[1]); // x*x*x = cube(x)
+            if (children[1].getOperation().getId() == Operation::SQUARE && children[1].getChildren()[0] == children[0])
+                return ExpressionTreeNode(new Operation::Cube(), children[0]); // x*x*x = cube(x)
+            break;
+        }
+        case Operation::DIVIDE:
+        {
+            if (children[0] == children[1])
+                return ExpressionTreeNode(new Operation::Constant(1.0)); // Dividing anything from itself is 0
+            double numerator = getConstantValue(children[0]);
+            if (numerator == 0.0) // 0 divided by something
+                return ExpressionTreeNode(new Operation::Constant(0.0));
+            if (numerator == 1.0) // 1 divided by something
+                return ExpressionTreeNode(new Operation::Reciprocal(), children[1]);
+            double denominator = getConstantValue(children[1]);
+            if (denominator == 1.0) // Divide by 1
+                return children[0];
+            if (children[1].getOperation().getId() == Operation::CONSTANT) {
+                if (children[0].getOperation().getId() == Operation::MULTIPLY_CONSTANT) // Combine a multiply and a divide into one multiply
+                    return ExpressionTreeNode(new Operation::MultiplyConstant(dynamic_cast<const Operation::MultiplyConstant*>(&children[0].getOperation())->getValue()/denominator), children[0].getChildren()[0]);
+                return ExpressionTreeNode(new Operation::MultiplyConstant(1.0/denominator), children[0]); // Replace a divide with a multiply
+            }
+            if (children[0].getOperation().getId() == Operation::NEGATE && children[1].getOperation().getId() == Operation::NEGATE) // The two negations cancel
+                return ExpressionTreeNode(new Operation::Divide(), children[0].getChildren()[0], children[1].getChildren()[0]);
+            if (children[1].getOperation().getId() == Operation::NEGATE && children[0].getOperation().getId() == Operation::MULTIPLY_CONSTANT) // Negate the constant
+                return ExpressionTreeNode(new Operation::Divide(), ExpressionTreeNode(new Operation::MultiplyConstant(-dynamic_cast<const Operation::MultiplyConstant*>(&children[0].getOperation())->getValue()), children[0].getChildren()[0]), children[1].getChildren()[0]);
+            if (children[0].getOperation().getId() == Operation::NEGATE) // Pull the negation out so it can possibly be optimized further
+                return ExpressionTreeNode(new Operation::Negate(), ExpressionTreeNode(new Operation::Divide(), children[0].getChildren()[0], children[1]));
+            if (children[1].getOperation().getId() == Operation::NEGATE) // Pull the negation out so it can possibly be optimized further
+                return ExpressionTreeNode(new Operation::Negate(), ExpressionTreeNode(new Operation::Divide(), children[0], children[1].getChildren()[0]));
+            if (children[1].getOperation().getId() == Operation::RECIPROCAL) // a/(1/b) = a*b
+                return ExpressionTreeNode(new Operation::Multiply(), children[0], children[1].getChildren()[0]);
+            break;
+        }
+        case Operation::POWER:
+        {
+            double base = getConstantValue(children[0]);
+            if (base == 0.0) // 0 to any power is 0
+                return ExpressionTreeNode(new Operation::Constant(0.0));
+            if (base == 1.0) // 1 to any power is 1
+                return ExpressionTreeNode(new Operation::Constant(1.0));
+            double exponent = getConstantValue(children[1]);
+            if (exponent == 0.0) // x^0 = 1
+                return ExpressionTreeNode(new Operation::Constant(1.0));
+            if (exponent == 1.0) // x^1 = x
+                return children[0];
+            if (exponent == -1.0) // x^-1 = recip(x)
+                return ExpressionTreeNode(new Operation::Reciprocal(), children[0]);
+            if (exponent == 2.0) // x^2 = square(x)
+                return ExpressionTreeNode(new Operation::Square(), children[0]);
+            if (exponent == 3.0) // x^3 = cube(x)
+                return ExpressionTreeNode(new Operation::Cube(), children[0]);
+            if (exponent == 0.5) // x^0.5 = sqrt(x)
+                return ExpressionTreeNode(new Operation::Sqrt(), children[0]);
+            if (exponent == exponent) // Constant power
+                return ExpressionTreeNode(new Operation::PowerConstant(exponent), children[0]);
+            break;
+        }
+        case Operation::NEGATE:
+        {
+            if (children[0].getOperation().getId() == Operation::MULTIPLY_CONSTANT) // Combine a multiply and a negate into a single multiply
+                return ExpressionTreeNode(new Operation::MultiplyConstant(-dynamic_cast<const Operation::MultiplyConstant*>(&children[0].getOperation())->getValue()), children[0].getChildren()[0]);
+            if (children[0].getOperation().getId() == Operation::CONSTANT) // Negate a constant
+                return ExpressionTreeNode(new Operation::Constant(-getConstantValue(children[0])));
+            if (children[0].getOperation().getId() == Operation::NEGATE) // The two negations cancel
+                return children[0].getChildren()[0];
+            break;
+        }
+        case Operation::MULTIPLY_CONSTANT:
+        {
+            if (children[0].getOperation().getId() == Operation::MULTIPLY_CONSTANT) // Combine two multiplies into a single one
+                return ExpressionTreeNode(new Operation::MultiplyConstant(dynamic_cast<const Operation::MultiplyConstant*>(&node.getOperation())->getValue()*dynamic_cast<const Operation::MultiplyConstant*>(&children[0].getOperation())->getValue()), children[0].getChildren()[0]);
+            if (children[0].getOperation().getId() == Operation::CONSTANT) // Multiply two constants
+                return ExpressionTreeNode(new Operation::Constant(dynamic_cast<const Operation::MultiplyConstant*>(&node.getOperation())->getValue()*getConstantValue(children[0])));
+            if (children[0].getOperation().getId() == Operation::NEGATE) // Combine a multiply and a negate into a single multiply
+                return ExpressionTreeNode(new Operation::MultiplyConstant(-dynamic_cast<const Operation::MultiplyConstant*>(&node.getOperation())->getValue()), children[0].getChildren()[0]);
+            break;
+        }
+        default:
+        {
+            // If operation ID is not one of the above,
+            // we don't substitute a simpler expression.
+            break;
+        }
+
+    }
+    return ExpressionTreeNode(node.getOperation().clone(), children);
+}
+
+ParsedExpression ParsedExpression::differentiate(const string& variable) const {
+    return differentiate(getRootNode(), variable);
+}
+
+ExpressionTreeNode ParsedExpression::differentiate(const ExpressionTreeNode& node, const string& variable) {
+    vector<ExpressionTreeNode> childDerivs(node.getChildren().size());
+    for (int i = 0; i < (int) childDerivs.size(); i++)
+        childDerivs[i] = differentiate(node.getChildren()[i], variable);
+    return node.getOperation().differentiate(node.getChildren(),childDerivs, variable);
+}
+
+double ParsedExpression::getConstantValue(const ExpressionTreeNode& node) {
+    if (node.getOperation().getId() == Operation::CONSTANT)
+        return dynamic_cast<const Operation::Constant&>(node.getOperation()).getValue();
+    return numeric_limits<double>::quiet_NaN();
+}
+
+ExpressionProgram ParsedExpression::createProgram() const {
+    return ExpressionProgram(*this);
+}
+
+CompiledExpression ParsedExpression::createCompiledExpression() const {
+    return CompiledExpression(*this);
+}
+
+ParsedExpression ParsedExpression::renameVariables(const map<string, string>& replacements) const {
+    return ParsedExpression(renameNodeVariables(getRootNode(), replacements));
+}
+
+ExpressionTreeNode ParsedExpression::renameNodeVariables(const ExpressionTreeNode& node, const map<string, string>& replacements) {
+    if (node.getOperation().getId() == Operation::VARIABLE) {
+        map<string, string>::const_iterator replace = replacements.find(node.getOperation().getName());
+        if (replace != replacements.end())
+            return ExpressionTreeNode(new Operation::Variable(replace->second));
+    }
+    vector<ExpressionTreeNode> children;
+    for (int i = 0; i < (int) node.getChildren().size(); i++)
+        children.push_back(renameNodeVariables(node.getChildren()[i], replacements));
+    return ExpressionTreeNode(node.getOperation().clone(), children);
+}
+
+ostream& Lepton::operator<<(ostream& out, const ExpressionTreeNode& node) {
+    if (node.getOperation().isInfixOperator() && node.getChildren().size() == 2) {
+        out << "(" << node.getChildren()[0] << ")" << node.getOperation().getName() << "(" << node.getChildren()[1] << ")";
+    }
+    else if (node.getOperation().isInfixOperator() && node.getChildren().size() == 1) {
+        out << "(" << node.getChildren()[0] << ")" << node.getOperation().getName();
+    }
+    else {
+        out << node.getOperation().getName();
+        if (node.getChildren().size() > 0) {
+            out << "(";
+            for (int i = 0; i < (int) node.getChildren().size(); i++) {
+                if (i > 0)
+                    out << ", ";
+                out << node.getChildren()[i];
+            }
+            out << ")";
+        }
+    }
+    return out;
+}
+
+ostream& Lepton::operator<<(ostream& out, const ParsedExpression& exp) {
+    out << exp.getRootNode();
+    return out;
+}
diff --git a/lib/colvars/lepton/src/Parser.cpp b/lib/colvars/lepton/src/Parser.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6b19d7370de6baa44217e04cc48a7ad1ede72df7
--- /dev/null
+++ b/lib/colvars/lepton/src/Parser.cpp
@@ -0,0 +1,406 @@
+/* -------------------------------------------------------------------------- *
+ *                                   Lepton                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the Lepton expression parser originating from              *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2009-2015 Stanford University and the Authors.      *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "lepton/Parser.h"
+#include "lepton/CustomFunction.h"
+#include "lepton/Exception.h"
+#include "lepton/ExpressionTreeNode.h"
+#include "lepton/Operation.h"
+#include "lepton/ParsedExpression.h"
+#include <cctype>
+#include <iostream>
+
+using namespace Lepton;
+using namespace std;
+
+static const string Digits = "0123456789";
+static const string Operators = "+-*/^";
+static const bool LeftAssociative[] = {true, true, true, true, false};
+static const int Precedence[] = {0, 0, 1, 1, 3};
+static const Operation::Id OperationId[] = {Operation::ADD, Operation::SUBTRACT, Operation::MULTIPLY, Operation::DIVIDE, Operation::POWER};
+
+class Lepton::ParseToken {
+public:
+    enum Type {Number, Operator, Variable, Function, LeftParen, RightParen, Comma, Whitespace};
+
+    ParseToken(string text, Type type) : text(text), type(type) {
+    }
+    const string& getText() const {
+        return text;
+    }
+    Type getType() const {
+        return type;
+    }
+private:
+    string text;
+    Type type;
+};
+
+string Parser::trim(const string& expression) {
+    // Remove leading and trailing spaces.
+    
+    int start, end;
+    for (start = 0; start < (int) expression.size() && isspace(expression[start]); start++)
+        ;
+    for (end = (int) expression.size()-1; end > start && isspace(expression[end]); end--)
+        ;
+    if (start == end && isspace(expression[end]))
+        return "";
+    return expression.substr(start, end-start+1);
+}
+
+ParseToken Parser::getNextToken(const string& expression, int start) {
+    char c = expression[start];
+    if (c == '(')
+        return ParseToken("(", ParseToken::LeftParen);
+    if (c == ')')
+        return ParseToken(")", ParseToken::RightParen);
+    if (c == ',')
+        return ParseToken(",", ParseToken::Comma);
+    if (Operators.find(c) != string::npos)
+        return ParseToken(string(1, c), ParseToken::Operator);
+    if (isspace(c)) {
+        // White space
+
+        for (int pos = start+1; pos < (int) expression.size(); pos++) {
+            if (!isspace(expression[pos]))
+                return ParseToken(expression.substr(start, pos-start), ParseToken::Whitespace);
+        }
+        return ParseToken(expression.substr(start, string::npos), ParseToken::Whitespace);
+    }
+    if (c == '.' || Digits.find(c) != string::npos) {
+        // A number
+
+        bool foundDecimal = (c == '.');
+        bool foundExp = false;
+        int pos;
+        for (pos = start+1; pos < (int) expression.size(); pos++) {
+            c = expression[pos];
+            if (Digits.find(c) != string::npos)
+                continue;
+            if (c == '.' && !foundDecimal) {
+                foundDecimal = true;
+                continue;
+            }
+            if ((c == 'e' || c == 'E') && !foundExp) {
+                foundExp = true;
+                if (pos < (int) expression.size()-1 && (expression[pos+1] == '-' || expression[pos+1] == '+'))
+                    pos++;
+                continue;
+            }
+            break;
+        }
+        return ParseToken(expression.substr(start, pos-start), ParseToken::Number);
+    }
+
+    // A variable, function, or left parenthesis
+
+    for (int pos = start; pos < (int) expression.size(); pos++) {
+        c = expression[pos];
+        if (c == '(')
+            return ParseToken(expression.substr(start, pos-start+1), ParseToken::Function);
+        if (Operators.find(c) != string::npos || c == ',' || c == ')' || isspace(c))
+            return ParseToken(expression.substr(start, pos-start), ParseToken::Variable);
+    }
+    return ParseToken(expression.substr(start, string::npos), ParseToken::Variable);
+}
+
+vector<ParseToken> Parser::tokenize(const string& expression) {
+    vector<ParseToken> tokens;
+    int pos = 0;
+    while (pos < (int) expression.size()) {
+        ParseToken token = getNextToken(expression, pos);
+        if (token.getType() != ParseToken::Whitespace)
+            tokens.push_back(token);
+        pos += (int) token.getText().size();
+    }
+    return tokens;
+}
+
+ParsedExpression Parser::parse(const string& expression) {
+    return parse(expression, map<string, CustomFunction*>());
+}
+
+ParsedExpression Parser::parse(const string& expression, const map<string, CustomFunction*>& customFunctions) {
+    try {
+        // First split the expression into subexpressions.
+
+        string primaryExpression = expression;
+        vector<string> subexpressions;
+        while (true) {
+            string::size_type pos = primaryExpression.find_last_of(';');
+            if (pos == string::npos)
+                break;
+            string sub = trim(primaryExpression.substr(pos+1));
+            if (sub.size() > 0)
+                subexpressions.push_back(sub);
+            primaryExpression = primaryExpression.substr(0, pos);
+        }
+
+        // Parse the subexpressions.
+
+        map<string, ExpressionTreeNode> subexpDefs;
+        for (int i = 0; i < (int) subexpressions.size(); i++) {
+            string::size_type equalsPos = subexpressions[i].find('=');
+            if (equalsPos == string::npos)
+                throw Exception("subexpression does not specify a name");
+            string name = trim(subexpressions[i].substr(0, equalsPos));
+            if (name.size() == 0)
+                throw Exception("subexpression does not specify a name");
+            vector<ParseToken> tokens = tokenize(subexpressions[i].substr(equalsPos+1));
+            int pos = 0;
+            subexpDefs[name] = parsePrecedence(tokens, pos, customFunctions, subexpDefs, 0);
+            if (pos != tokens.size())
+                throw Exception("unexpected text at end of subexpression: "+tokens[pos].getText());
+        }
+
+        // Now parse the primary expression.
+
+        vector<ParseToken> tokens = tokenize(primaryExpression);
+        int pos = 0;
+        ExpressionTreeNode result = parsePrecedence(tokens, pos, customFunctions, subexpDefs, 0);
+        if (pos != tokens.size())
+            throw Exception("unexpected text at end of expression: "+tokens[pos].getText());
+        return ParsedExpression(result);
+    }
+    catch (Exception& ex) {
+        throw Exception("Parse error in expression \""+expression+"\": "+ex.what());
+    }
+}
+
+ExpressionTreeNode Parser::parsePrecedence(const vector<ParseToken>& tokens, int& pos, const map<string, CustomFunction*>& customFunctions,
+            const map<string, ExpressionTreeNode>& subexpressionDefs, int precedence) {
+    if (pos == tokens.size())
+        throw Exception("unexpected end of expression");
+
+    // Parse the next value (number, variable, function, parenthesized expression)
+
+    ParseToken token = tokens[pos];
+    ExpressionTreeNode result;
+    if (token.getType() == ParseToken::Number) {
+        double value;
+        stringstream(token.getText()) >> value;
+        result = ExpressionTreeNode(new Operation::Constant(value));
+        pos++;
+    }
+    else if (token.getType() == ParseToken::Variable) {
+        map<string, ExpressionTreeNode>::const_iterator subexp = subexpressionDefs.find(token.getText());
+        if (subexp == subexpressionDefs.end()) {
+            Operation* op = new Operation::Variable(token.getText());
+            result = ExpressionTreeNode(op);
+        }
+        else
+            result = subexp->second;
+        pos++;
+    }
+    else if (token.getType() == ParseToken::LeftParen) {
+        pos++;
+        result = parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, 0);
+        if (pos == tokens.size() || tokens[pos].getType() != ParseToken::RightParen)
+            throw Exception("unbalanced parentheses");
+        pos++;
+    }
+    else if (token.getType() == ParseToken::Function) {
+        pos++;
+        vector<ExpressionTreeNode> args;
+        bool moreArgs;
+        do {
+            args.push_back(parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, 0));
+            moreArgs = (pos < (int) tokens.size() && tokens[pos].getType() == ParseToken::Comma);
+            if (moreArgs)
+                pos++;
+        } while (moreArgs);
+        if (pos == tokens.size() || tokens[pos].getType() != ParseToken::RightParen)
+            throw Exception("unbalanced parentheses");
+        pos++;
+        Operation* op = getFunctionOperation(token.getText(), customFunctions);
+        try {
+            result = ExpressionTreeNode(op, args);
+        }
+        catch (...) {
+            delete op;
+            throw;
+        }
+    }
+    else if (token.getType() == ParseToken::Operator && token.getText() == "-") {
+        pos++;
+        ExpressionTreeNode toNegate = parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, 2);
+        result = ExpressionTreeNode(new Operation::Negate(), toNegate);
+    }
+    else
+        throw Exception("unexpected token: "+token.getText());
+
+    // Now deal with the next binary operator.
+
+    while (pos < (int) tokens.size() && tokens[pos].getType() == ParseToken::Operator) {
+        token = tokens[pos];
+        int opIndex = (int) Operators.find(token.getText());
+        int opPrecedence = Precedence[opIndex];
+        if (opPrecedence < precedence)
+            return result;
+        pos++;
+        ExpressionTreeNode arg = parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, LeftAssociative[opIndex] ? opPrecedence+1 : opPrecedence);
+        Operation* op = getOperatorOperation(token.getText());
+        try {
+            result = ExpressionTreeNode(op, result, arg);
+        }
+        catch (...) {
+            delete op;
+            throw;
+        }
+    }
+    return result;
+}
+
+Operation* Parser::getOperatorOperation(const std::string& name) {
+    switch (OperationId[Operators.find(name)]) {
+        case Operation::ADD:
+            return new Operation::Add();
+        case Operation::SUBTRACT:
+            return new Operation::Subtract();
+        case Operation::MULTIPLY:
+            return new Operation::Multiply();
+        case Operation::DIVIDE:
+            return new Operation::Divide();
+        case Operation::POWER:
+            return new Operation::Power();
+        default:
+            throw Exception("unknown operator");
+    }
+}
+
+Operation* Parser::getFunctionOperation(const std::string& name, const map<string, CustomFunction*>& customFunctions) {
+
+    static map<string, Operation::Id> opMap;
+    if (opMap.size() == 0) {
+        opMap["sqrt"] = Operation::SQRT;
+        opMap["exp"] = Operation::EXP;
+        opMap["log"] = Operation::LOG;
+        opMap["sin"] = Operation::SIN;
+        opMap["cos"] = Operation::COS;
+        opMap["sec"] = Operation::SEC;
+        opMap["csc"] = Operation::CSC;
+        opMap["tan"] = Operation::TAN;
+        opMap["cot"] = Operation::COT;
+        opMap["asin"] = Operation::ASIN;
+        opMap["acos"] = Operation::ACOS;
+        opMap["atan"] = Operation::ATAN;
+        opMap["sinh"] = Operation::SINH;
+        opMap["cosh"] = Operation::COSH;
+        opMap["tanh"] = Operation::TANH;
+        opMap["erf"] = Operation::ERF;
+        opMap["erfc"] = Operation::ERFC;
+        opMap["step"] = Operation::STEP;
+        opMap["delta"] = Operation::DELTA;
+        opMap["square"] = Operation::SQUARE;
+        opMap["cube"] = Operation::CUBE;
+        opMap["recip"] = Operation::RECIPROCAL;
+        opMap["min"] = Operation::MIN;
+        opMap["max"] = Operation::MAX;
+        opMap["abs"] = Operation::ABS;
+        opMap["floor"] = Operation::FLOOR;
+        opMap["ceil"] = Operation::CEIL;
+        opMap["select"] = Operation::SELECT;
+    }
+    string trimmed = name.substr(0, name.size()-1);
+
+    // First check custom functions.
+
+    map<string, CustomFunction*>::const_iterator custom = customFunctions.find(trimmed);
+    if (custom != customFunctions.end())
+        return new Operation::Custom(trimmed, custom->second->clone());
+
+    // Now try standard functions.
+
+    map<string, Operation::Id>::const_iterator iter = opMap.find(trimmed);
+    if (iter == opMap.end())
+        throw Exception("unknown function: "+trimmed);
+    switch (iter->second) {
+        case Operation::SQRT:
+            return new Operation::Sqrt();
+        case Operation::EXP:
+            return new Operation::Exp();
+        case Operation::LOG:
+            return new Operation::Log();
+        case Operation::SIN:
+            return new Operation::Sin();
+        case Operation::COS:
+            return new Operation::Cos();
+        case Operation::SEC:
+            return new Operation::Sec();
+        case Operation::CSC:
+            return new Operation::Csc();
+        case Operation::TAN:
+            return new Operation::Tan();
+        case Operation::COT:
+            return new Operation::Cot();
+        case Operation::ASIN:
+            return new Operation::Asin();
+        case Operation::ACOS:
+            return new Operation::Acos();
+        case Operation::ATAN:
+            return new Operation::Atan();
+        case Operation::SINH:
+            return new Operation::Sinh();
+        case Operation::COSH:
+            return new Operation::Cosh();
+        case Operation::TANH:
+            return new Operation::Tanh();
+        case Operation::ERF:
+            return new Operation::Erf();
+        case Operation::ERFC:
+            return new Operation::Erfc();
+        case Operation::STEP:
+            return new Operation::Step();
+        case Operation::DELTA:
+            return new Operation::Delta();
+        case Operation::SQUARE:
+            return new Operation::Square();
+        case Operation::CUBE:
+            return new Operation::Cube();
+        case Operation::RECIPROCAL:
+            return new Operation::Reciprocal();
+        case Operation::MIN:
+            return new Operation::Min();
+        case Operation::MAX:
+            return new Operation::Max();
+        case Operation::ABS:
+            return new Operation::Abs();
+        case Operation::FLOOR:
+            return new Operation::Floor();
+        case Operation::CEIL:
+            return new Operation::Ceil();
+        case Operation::SELECT:
+            return new Operation::Select();
+        default:
+            throw Exception("unknown function");
+    }
+}
diff --git a/lib/gpu/Makefile.linux b/lib/gpu/Makefile.linux
deleted file mode 100644
index 2991b9643d8951f6bddfa42d99f28cd039ec29bb..0000000000000000000000000000000000000000
--- a/lib/gpu/Makefile.linux
+++ /dev/null
@@ -1,52 +0,0 @@
-# /* ----------------------------------------------------------------------   
-#  Generic Linux Makefile for CUDA 
-#     - Change CUDA_ARCH for your GPU
-# ------------------------------------------------------------------------- */
-
-# which file will be copied to Makefile.lammps
-
-EXTRAMAKE = Makefile.lammps.standard
-
-ifeq ($(CUDA_HOME),)
-CUDA_HOME = /opt/cuda
-endif
-
-NVCC = nvcc
-
-# Tesla CUDA
-#CUDA_ARCH = -arch=sm_21
-# newer CUDA
-#CUDA_ARCH = -arch=sm_13
-# older CUDA
-#CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE
-CUDA_ARCH = -arch=sm_61
-
-# this setting should match LAMMPS Makefile
-# one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL
-
-LMP_INC = -DLAMMPS_SMALLBIG
-
-# precision for GPU calculations
-# -D_SINGLE_SINGLE  # Single precision for all calculations
-# -D_DOUBLE_DOUBLE  # Double precision for all calculations
-# -D_SINGLE_DOUBLE  # Accumulation of forces, etc. in double
-
-CUDA_PRECISION = -D_SINGLE_DOUBLE
-
-CUDA_INCLUDE = -I$(CUDA_HOME)/include
-CUDA_LIB = -L$(CUDA_HOME)/lib64
-CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math $(LMP_INC) -Xcompiler -fPIC
-
-CUDR_CPP = mpicxx -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK -DOMPI_SKIP_MPICXX=1 -fPIC
-CUDR_OPTS = -O2 $(LMP_INC) # -xHost -no-prec-div -ansi-alias
-
-BIN_DIR = ./
-OBJ_DIR = ./
-LIB_DIR = ./
-AR = ar
-BSH = /bin/sh
-
-CUDPP_OPT = -DUSE_CUDPP -Icudpp_mini
-
-include Nvidia.makefile
-
diff --git a/lib/kim/Install.py b/lib/kim/Install.py
index aa244ee6eabd0f1296c1f39fd74639aab9f021dc..0e873889546d198398f22d3fc2df37d0bc3fec06 100644
--- a/lib/kim/Install.py
+++ b/lib/kim/Install.py
@@ -21,7 +21,7 @@ Syntax from lib dir: python Install.py -b -v version  -a kim-name
 specify one or more options, order does not matter
 
   -v = version of KIM API library to use
-       default = kim-api-v1.8.2 (current as of June 2017)
+       default = kim-api-v1.9.2 (current as of Oct 2017)
   -b = download and build base KIM API library with example Models
        this will delete any previous installation in the current folder
   -n = do NOT download and build base KIM API library.
@@ -109,7 +109,7 @@ nargs = len(args)
 if nargs == 0: error()
 
 thisdir = os.environ['PWD']
-version = "kim-api-v1.8.2"
+version = "kim-api-v1.9.2"
 
 buildflag = False
 everythingflag = False
@@ -150,7 +150,7 @@ while iarg < len(args):
   else: error()
 
 thisdir = os.path.abspath(thisdir)
-url = "https://s3.openkim.org/kim-api/%s.tgz" % version
+url = "https://s3.openkim.org/kim-api/%s.txz" % version
 
 # set KIM API directory
 
@@ -199,9 +199,9 @@ if buildflag:
   # download entire kim-api tarball
 
   print("Downloading kim-api tarball ...")
-  geturl(url,"%s/%s.tgz" % (thisdir,version))
+  geturl(url,"%s/%s.txz" % (thisdir,version))
   print("Unpacking kim-api tarball ...")
-  cmd = 'cd "%s"; rm -rf "%s"; tar -xzvf %s.tgz' % (thisdir,version,version)
+  cmd = 'cd "%s"; rm -rf "%s"; tar -xJvf %s.txz' % (thisdir,version,version)
   subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
 
   # configure kim-api
@@ -211,18 +211,6 @@ if buildflag:
   subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
 
   # build kim-api
-
-  print("Configuring example Models")
-  cmd = 'cd "%s/%s"; make add-examples' % (thisdir,version)
-  txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
-  if verboseflag: print (txt.decode("UTF-8"))
-
-  if everythingflag:
-    print("Configuring all OpenKIM models, this will take a while ...")
-    cmd = 'cd "%s/%s"; make add-OpenKIM' % (thisdir,version)
-    txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
-    if verboseflag: print(txt.decode("UTF-8"))
-
   print("Building kim-api ...")
   cmd = 'cd "%s/%s"; make' % (thisdir,version)
   txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
@@ -235,18 +223,28 @@ if buildflag:
   txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
   if verboseflag: print(txt.decode("UTF-8"))
 
-  cmd = 'cd "%s/%s"; make install-set-default-to-v1' %(thisdir,version)
-  txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
-  if verboseflag: print(txt.decode("UTF-8"))
-
   # remove source files
 
+  print("Building and installing example Models")
+  cmd = 'cd "%s/%s/examples"; make model-drivers-all-system' % (thisdir,version)
+  txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
+  if verboseflag: print (txt.decode("UTF-8"))
+  cmd = 'cd "%s/%s/examples"; make models-all-system' % (thisdir,version)
+  txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
+  if verboseflag: print (txt.decode("UTF-8"))
+
   print("Removing kim-api source and build files ...")
-  cmd = 'cd "%s"; rm -rf %s; rm -rf %s.tgz' % (thisdir,version,version)
+  cmd = 'cd "%s"; rm -rf %s; rm -rf %s.txz' % (thisdir,version,version)
   subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
 
-# add a single model (and possibly its driver) to existing KIM installation
+  # add all OpenKIM models, if desired
+  if everythingflag:
+    print("Adding all OpenKIM models, this will take a while ...")
+    cmd = '%s/bin/kim-api-v1-collections-management install system OpenKIM' % (kimdir)
+    txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
+    if verboseflag: print(txt.decode("UTF-8"))
 
+# add single OpenKIM model
 if addflag:
 
   if not os.path.isdir(kimdir):
@@ -254,64 +252,6 @@ if addflag:
     error()
 
   # download single model
-
-  print("Downloading tarball for %s..." % addmodelname)
-  url = "https://openkim.org/download/%s.tgz" % addmodelname
-  geturl(url,"%s/%s.tgz" % (thisdir,addmodelname))
-
-  print("Unpacking item tarball ...")
-  cmd = 'cd "%s"; tar -xzvf %s.tgz' % (thisdir,addmodelname)
-  subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
-
-  print("Building item ...")
-  cmd = 'cd "%s/%s"; make; make install' %(thisdir,addmodelname)
-  try:
-    txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
-  except subprocess.CalledProcessError as e:
-
-    # Error: but first, check to see if it needs a driver
-    firstRunOutput = e.output.decode("UTF-8")
-
-    cmd = 'cd "%s/%s"; make kim-item-type' % (thisdir,addmodelname)
-    txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
-    txt = txt.decode("UTF-8")
-    if txt == "ParameterizedModel":
-
-      # Get and install driver
-
-      cmd = 'cd "%s/%s"; make model-driver-name' % (thisdir,addmodelname)
-      txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
-      adddrivername = txt.decode("UTF-8").strip()
-      print("First installing model driver: %s..." % adddrivername)
-      cmd = 'cd "%s"; python Install.py -n -a %s' % (thisdir,adddrivername)
-      try:
-        txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
-      except subprocess.CalledProcessError as e:
-        print(e.output)
-        sys.exit()
-
-      if verboseflag: print(txt.decode("UTF-8"))
-
-      # now install the model that needed the driver
-
-      print("Now installing model : %s" % addmodelname)
-      cmd = 'cd "%s"; python Install.py -n -a %s' % (thisdir,addmodelname)
-      try:
-        txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
-      except subprocess.CalledProcessError as e:
-        print(e.output)
-        sys.exit()
-      print(txt.decode("UTF-8"))
-      sys.exit()
-    else:
-      print(firstRunOutput)
-      print("Error, unable to build and install OpenKIM item: %s" \
-            % addmodelname)
-      sys.exit()
-
-  # success the first time
-
-  if verboseflag: print(txt.decode("UTF-8"))
-  print("Removing kim item source and build files ...")
-  cmd = 'cd "%s"; rm -rf %s; rm -rf %s.tgz' %(thisdir,addmodelname,addmodelname)
-  subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
+  cmd = '%s/bin/kim-api-v1-collections-management install system %s' % (kimdir, addmodelname)
+  txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
+  if verboseflag: print (txt.decode("UTF-8"))
diff --git a/lib/kim/Makefile.lammps b/lib/kim/Makefile.lammps
index b66d7005a498f119341ba24cef5e24cfdda99630..d73891d1e20578b890946c69f091f45c78131b4f 100644
--- a/lib/kim/Makefile.lammps
+++ b/lib/kim/Makefile.lammps
@@ -18,10 +18,10 @@
 
 include ../../lib/kim/Makefile.KIM_DIR
 
-ifeq ($(wildcard $(KIM_INSTALL_DIR)/bin/kim-api-build-config),)
-  KIM_CONFIG_HELPER = kim-api-build-config
+ifeq ($(wildcard $(KIM_INSTALL_DIR)/bin/kim-api-v1-build-config),)
+  KIM_CONFIG_HELPER = kim-api-v1-build-config
 else
-  KIM_CONFIG_HELPER = $(KIM_INSTALL_DIR)/bin/kim-api-build-config
+  KIM_CONFIG_HELPER = $(KIM_INSTALL_DIR)/bin/kim-api-v1-build-config
 endif
 ifeq ($(shell $(KIM_CONFIG_HELPER) --version 2> /dev/null),)
   $(error $(KIM_CONFIG_HELPER) utility is not available.  Something is wrong with your KIM API package setup)
diff --git a/lib/kokkos/CHANGELOG.md b/lib/kokkos/CHANGELOG.md
index 43d3f17d634eacb38cf1f017afb9fea5706ac7cb..d414056187771f977114194c8e3e0091de47de75 100644
--- a/lib/kokkos/CHANGELOG.md
+++ b/lib/kokkos/CHANGELOG.md
@@ -1,5 +1,24 @@
 # Change Log
 
+## [2.04.04](https://github.com/kokkos/kokkos/tree/2.04.04) (2017-09-11)
+[Full Changelog](https://github.com/kokkos/kokkos/compare/2.04.00...2.04.04)
+
+**Implemented enhancements:**
+
+- OpenMP partition: set number of threads on nested level [\#1082](https://github.com/kokkos/kokkos/issues/1082)
+- Add StaticCrsGraph row\(\) method [\#1071](https://github.com/kokkos/kokkos/issues/1071)
+- Enhance Kokkos complex operator overloading [\#1052](https://github.com/kokkos/kokkos/issues/1052)
+- Tell Trilinos packages about host+device lambda [\#1019](https://github.com/kokkos/kokkos/issues/1019)
+- Function markup for defaulted class members [\#952](https://github.com/kokkos/kokkos/issues/952)
+- Add deterministic random number generator [\#857](https://github.com/kokkos/kokkos/issues/857)
+
+**Fixed bugs:**
+
+- Fix reduction\_identity\<T\>::max for floating point numbers [\#1048](https://github.com/kokkos/kokkos/issues/1048)
+- Fix MD iteration policy ignores lower bound on GPUs [\#1041](https://github.com/kokkos/kokkos/issues/1041)
+- (Experimental) HBWSpace  Linking issues in KokkosKernels [\#1094](https://github.com/kokkos/kokkos/issues/1094)
+- (Experimental) ROCm:  algorithms/unit\_tests test\_sort failing with segfault [\#1070](https://github.com/kokkos/kokkos/issues/1070)
+
 ## [2.04.00](https://github.com/kokkos/kokkos/tree/2.04.00) (2017-08-16)
 [Full Changelog](https://github.com/kokkos/kokkos/compare/2.03.13...2.04.00)
 
diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos
deleted file mode 100644
index 0b0036da374b7bf1d0cccb5f515124012b689888..0000000000000000000000000000000000000000
--- a/lib/kokkos/Makefile.kokkos
+++ /dev/null
@@ -1,954 +0,0 @@
-# Default settings common options.
-
-#LAMMPS specific settings:
-KOKKOS_PATH=../../lib/kokkos
-CXXFLAGS=$(CCFLAGS)
-
-# Options: Cuda,ROCm,OpenMP,Pthreads,Qthreads,Serial
-KOKKOS_DEVICES ?= "Cuda, OpenMP"
-#KOKKOS_DEVICES ?= "Pthreads"
-# Options: 
-KOKKOS_ARCH ?= "Pascal61"
-# NVIDIA:   Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61
-# ARM:      ARMv80,ARMv81,ARMv8-ThunderX
-# IBM:      BGQ,Power7,Power8,Power9
-# AMD-GPUS: Kaveri,Carrizo,Fiji,Vega
-# AMD-CPUS: AMDAVX,Ryzen,Epyc
-# Options: yes,no
-KOKKOS_DEBUG ?= "no"
-# Options: hwloc,librt,experimental_memkind
-KOKKOS_USE_TPLS ?= ""
-# Options: c++11,c++1z
-KOKKOS_CXX_STANDARD ?= "c++11"
-# Options: aggressive_vectorization,disable_profiling
-KOKKOS_OPTIONS ?= ""
-
-# Default settings specific options.
-# Options: force_uvm,use_ldg,rdc,enable_lambda
-KOKKOS_CUDA_OPTIONS ?= "enable_lambda"
-
-# Check for general settings.
-KOKKOS_INTERNAL_ENABLE_DEBUG := $(strip $(shell echo $(KOKKOS_DEBUG) | grep "yes" | wc -l))
-KOKKOS_INTERNAL_ENABLE_CXX11 := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++11" | wc -l))
-KOKKOS_INTERNAL_ENABLE_CXX1Z := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++1z" | wc -l))
-
-# Check for external libraries.
-KOKKOS_INTERNAL_USE_HWLOC := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "hwloc" | wc -l))
-KOKKOS_INTERNAL_USE_LIBRT := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "librt" | wc -l))
-KOKKOS_INTERNAL_USE_MEMKIND := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "experimental_memkind" | wc -l))
-
-# Check for advanced settings.
-KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "compiler_warnings" | wc -l))
-KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "aggressive_vectorization" | wc -l))
-KOKKOS_INTERNAL_DISABLE_PROFILING := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "disable_profiling" | wc -l))
-KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "disable_dualview_modify_check" | wc -l))
-KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "enable_profile_load_print" | wc -l))
-KOKKOS_INTERNAL_CUDA_USE_LDG := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "use_ldg" | wc -l))
-KOKKOS_INTERNAL_CUDA_USE_UVM := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "force_uvm" | wc -l))
-KOKKOS_INTERNAL_CUDA_USE_RELOC := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "rdc" | wc -l))
-KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "enable_lambda" | wc -l))
-
-
-# Check for Kokkos Host Execution Spaces one of which must be on.
-KOKKOS_INTERNAL_USE_OPENMP := $(strip $(shell echo $(subst OpenMPTarget,,$(KOKKOS_DEVICES)) | grep OpenMP | wc -l))
-KOKKOS_INTERNAL_USE_PTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Pthread | wc -l))
-KOKKOS_INTERNAL_USE_QTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Qthreads | wc -l))
-KOKKOS_INTERNAL_USE_SERIAL := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Serial | wc -l))
-
-ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 0)
-  ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0)
-    ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 0)
-      KOKKOS_INTERNAL_USE_SERIAL := 1
-    endif
-  endif
-endif
-
-# Check for other Execution Spaces.
-KOKKOS_INTERNAL_USE_CUDA := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Cuda | wc -l))
-KOKKOS_INTERNAL_USE_ROCM := $(strip $(shell echo $(KOKKOS_DEVICES) | grep ROCm | wc -l))
-KOKKOS_INTERNAL_USE_OPENMPTARGET := $(strip $(shell echo $(KOKKOS_DEVICES) | grep OpenMPTarget | wc -l))
-
-ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
-  KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc)
-  CUDA_PATH ?= $(KOKKOS_INTERNAL_NVCC_PATH:/bin/nvcc=)
-  KOKKOS_INTERNAL_COMPILER_NVCC_VERSION := $(shell nvcc --version 2>&1 | grep release | cut -d' ' -f5 | cut -d',' -f1 | tr -d .)
-endif
-
-# Check OS.
-KOKKOS_OS                      := $(strip $(shell uname -s))
-KOKKOS_INTERNAL_OS_CYGWIN      := $(strip $(shell uname -s | grep CYGWIN | wc -l))
-KOKKOS_INTERNAL_OS_LINUX       := $(strip $(shell uname -s | grep Linux  | wc -l))
-KOKKOS_INTERNAL_OS_DARWIN      := $(strip $(shell uname -s | grep Darwin | wc -l))
-
-# Check compiler.
-KOKKOS_INTERNAL_COMPILER_INTEL       := $(strip $(shell $(CXX) --version       2>&1 | grep "Intel Corporation" | wc -l))
-KOKKOS_INTERNAL_COMPILER_PGI         := $(strip $(shell $(CXX) --version       2>&1 | grep PGI                 | wc -l))
-KOKKOS_INTERNAL_COMPILER_XL          := $(strip $(shell $(CXX) -qversion       2>&1 | grep XL                  | wc -l))
-KOKKOS_INTERNAL_COMPILER_CRAY        := $(strip $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-"               | wc -l))
-KOKKOS_INTERNAL_COMPILER_NVCC        := $(strip $(shell $(CXX) --version       2>&1 | grep nvcc                | wc -l))
-ifneq ($(OMPI_CXX),)
-  KOKKOS_INTERNAL_COMPILER_NVCC      := $(strip $(shell $(OMPI_CXX) --version       2>&1 | grep nvcc                | wc -l))
-endif
-ifneq ($(MPICH_CXX),)
-  KOKKOS_INTERNAL_COMPILER_NVCC      := $(strip $(shell $(MPICH_CXX) --version       2>&1 | grep nvcc                | wc -l))
-endif
-KOKKOS_INTERNAL_COMPILER_CLANG       := $(strip $(shell $(CXX) --version       2>&1 | grep clang               | wc -l))
-KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(strip $(shell $(CXX) --version       2>&1 | grep "apple-darwin"      | wc -l))
-KOKKOS_INTERNAL_COMPILER_HCC         := $(strip $(shell $(CXX) --version       2>&1 | grep HCC                 | wc -l))
-
-ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 2)
-  KOKKOS_INTERNAL_COMPILER_CLANG = 1
-endif
-ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 2)
-  KOKKOS_INTERNAL_COMPILER_XL = 1
-endif
-
-# Apple Clang passes both clang and apple clang tests, so turn off clang.
-ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1)
-  KOKKOS_INTERNAL_COMPILER_CLANG = 0
-endif
-# AMD HCC passes both clang and hcc test so turn off clang
-ifeq ($(KOKKOS_INTERNAL_COMPILER_HCC), 1)
-  KOKKOS_INTENAL_COMPILER_CLANG = 0
-endif
-
-ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
-  KOKKOS_INTERNAL_COMPILER_CLANG_VERSION := $(shell clang --version | grep version | cut -d ' ' -f3 | tr -d '.')
-
-  ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
-    ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_CLANG_VERSION) -lt 400; echo $$?),0)
-      $(error Compiling Cuda code directly with Clang requires version 4.0.0 or higher)
-    endif
-
-    KOKKOS_INTERNAL_CUDA_USE_LAMBDA := 1
-  endif
-endif
-
-# Set compiler warnings flags.
-ifeq ($(KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS), 1)
-  ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
-    # TODO check if PGI accepts GNU style warnings
-    KOKKOS_INTERNAL_COMPILER_WARNINGS =
-  else
-    ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
-      KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized
-    else
-      ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1)
-        KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized
-      else
-        ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
-          KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized
-        else
-          ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
-            # TODO check if cray accepts GNU style warnings
-            KOKKOS_INTERNAL_COMPILER_WARNINGS =
-          else
-            #gcc
-            KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wignored-qualifiers -Wempty-body -Wclobbered -Wuninitialized
-          endif
-        endif
-      endif
-    endif
-  endif
-else
-  KOKKOS_INTERNAL_COMPILER_WARNINGS =
-endif
-
-# Set OpenMP flags.
-ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
-  KOKKOS_INTERNAL_OPENMP_FLAG := -mp
-else
-  ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
-    KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp=libomp
-  else
-    ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1)
-      KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp=libomp
-    else
-      ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
-        KOKKOS_INTERNAL_OPENMP_FLAG := -qsmp=omp
-      else
-        ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
-          # OpenMP is turned on by default in Cray compiler environment.
-          KOKKOS_INTERNAL_OPENMP_FLAG :=
-        else
-          KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp
-        endif
-      endif
-    endif
-  endif
-endif
-ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
-  KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_IBM_XL_OMP45_WORKAROUND -qsmp=omp -qoffload -qnoeh
-else
-  ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
-    KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_BUG_WORKAROUND_IBM_CLANG_OMP45_VIEW_INIT -fopenmp-implicit-declare-target -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp -fopenmp=libomp
-  endif
-endif
-
-# Set C++11 flags.
-ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
-  KOKKOS_INTERNAL_CXX11_FLAG := --c++11
-else
-  ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
-     KOKKOS_INTERNAL_CXX11_FLAG := -std=c++11
-  else
-    ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
-      KOKKOS_INTERNAL_CXX11_FLAG := -hstd=c++11
-    else
-      ifeq ($(KOKKOS_INTERNAL_COMPILER_HCC), 1)
-        KOKKOS_INTERNAL_CXX11_FLAG := 
-      else
-        KOKKOS_INTERNAL_CXX11_FLAG := --std=c++11
-        KOKKOS_INTERNAL_CXX1Z_FLAG := --std=c++1z
-      endif
-    endif
-  endif
-endif
-
-# Check for Kokkos Architecture settings.
-
-# Intel based.
-KOKKOS_INTERNAL_USE_ARCH_KNC := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNC | wc -l))
-KOKKOS_INTERNAL_USE_ARCH_WSM := $(strip $(shell echo $(KOKKOS_ARCH) | grep WSM | wc -l))
-KOKKOS_INTERNAL_USE_ARCH_SNB := $(strip $(shell echo $(KOKKOS_ARCH) | grep SNB | wc -l))
-KOKKOS_INTERNAL_USE_ARCH_HSW := $(strip $(shell echo $(KOKKOS_ARCH) | grep HSW | wc -l))
-KOKKOS_INTERNAL_USE_ARCH_BDW := $(strip $(shell echo $(KOKKOS_ARCH) | grep BDW | wc -l))
-KOKKOS_INTERNAL_USE_ARCH_SKX := $(strip $(shell echo $(KOKKOS_ARCH) | grep SKX | wc -l))
-KOKKOS_INTERNAL_USE_ARCH_KNL := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNL | wc -l))
-
-# NVIDIA based.
-NVCC_WRAPPER := $(KOKKOS_PATH)/bin/nvcc_wrapper
-KOKKOS_INTERNAL_USE_ARCH_KEPLER30 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler30 | wc -l))
-KOKKOS_INTERNAL_USE_ARCH_KEPLER32 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler32 | wc -l))
-KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler35 | wc -l))
-KOKKOS_INTERNAL_USE_ARCH_KEPLER37 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler37 | wc -l))
-KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell50 | wc -l))
-KOKKOS_INTERNAL_USE_ARCH_MAXWELL52 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell52 | wc -l))
-KOKKOS_INTERNAL_USE_ARCH_MAXWELL53 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell53 | wc -l))
-KOKKOS_INTERNAL_USE_ARCH_PASCAL61 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Pascal61 | wc -l))
-KOKKOS_INTERNAL_USE_ARCH_PASCAL60 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Pascal60 | wc -l))
-KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30)  \
-                                                      + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32)  \
-                                                      + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35)  \
-                                                      + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37)  \
-                                                      + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61)  \
-                                                      + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60)  \
-                                                      + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
-                                                      + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
-                                                      + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc))
-
-ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
-  KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell | wc -l))
-  KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler | wc -l))
-  KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30)  \
-                                                        + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32)  \
-                                                        + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35)  \
-                                                        + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37)  \
-                                                        + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61)  \
-                                                        + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60)  \
-                                                        + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
-                                                        + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
-                                                        + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc))
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 1)
-  ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
-    ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
-      KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc)
-      CUDA_PATH ?= $(KOKKOS_INTERNAL_NVCC_PATH:/bin/nvcc=)
-      KOKKOS_INTERNAL_OPENMPTARGET_FLAG := $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG) --cuda-path=$(CUDA_PATH)
-    endif
-  endif
-endif
-# ARM based.
-KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv80 | wc -l))
-KOKKOS_INTERNAL_USE_ARCH_ARMV81 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv81 | wc -l))
-KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv8-ThunderX | wc -l))
-KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX) | bc))
-
-# IBM based.
-KOKKOS_INTERNAL_USE_ARCH_BGQ := $(strip $(shell echo $(KOKKOS_ARCH) | grep BGQ | wc -l))
-KOKKOS_INTERNAL_USE_ARCH_POWER7 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power7 | wc -l))
-KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power8 | wc -l))
-KOKKOS_INTERNAL_USE_ARCH_POWER9 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power9 | wc -l))
-KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BGQ)+$(KOKKOS_INTERNAL_USE_ARCH_POWER7)+$(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc))
-
-# AMD based.
-KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(strip $(shell echo $(KOKKOS_ARCH) | grep AMDAVX | wc -l))
-KOKKOS_INTERNAL_USE_ARCH_RYZEN := $(strip $(shell echo $(KOKKOS_ARCH) | grep Ryzen | wc -l))
-KOKKOS_INTERNAL_USE_ARCH_EPYC := $(strip $(shell echo $(KOKKOS_ARCH) | grep Epyc | wc -l))
-KOKKOS_INTERNAL_USE_ARCH_KAVERI := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kaveri | wc -l))
-KOKKOS_INTERNAL_USE_ARCH_CARRIZO := $(strip $(shell echo $(KOKKOS_ARCH) | grep Carrizo | wc -l))
-KOKKOS_INTERNAL_USE_ARCH_FIJI := $(strip $(shell echo $(KOKKOS_ARCH) | grep Fiji | wc -l))
-KOKKOS_INTERNAL_USE_ARCH_VEGA := $(strip $(shell echo $(KOKKOS_ARCH) | grep Vega | wc -l))
-KOKKOS_INTERNAL_USE_ARCH_GFX901 := $(strip $(shell echo $(KOKKOS_ARCH) | grep gfx901 | wc -l))
-
-# Any AVX?
-KOKKOS_INTERNAL_USE_ARCH_SSE42      := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_WSM) | bc ))
-KOKKOS_INTERNAL_USE_ARCH_AVX        := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX) | bc ))
-KOKKOS_INTERNAL_USE_ARCH_AVX2       := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW) | bc ))
-KOKKOS_INTERNAL_USE_ARCH_AVX512MIC  := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc ))
-KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc ))
-
-# Decide what ISA level we are able to support.
-KOKKOS_INTERNAL_USE_ISA_X86_64    := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_WSM)+$(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc ))
-KOKKOS_INTERNAL_USE_ISA_KNC       := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNC) | bc ))
-KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc ))
-KOKKOS_INTERNAL_USE_ISA_POWERPCBE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER7) | bc ))
-
-# Decide whether we can support transactional memory
-KOKKOS_INTERNAL_USE_TM            := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc ))
-
-# Incompatible flags?
-KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_SSE42)+$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1" | bc ))
-KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1" | bc))
-
-ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1)
-  $(error Defined Multiple Host architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) )
-endif
-ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIGPU), 1)
-  $(error Defined Multiple GPU architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) )
-endif
-
-# Generating the list of Flags.
-
-KOKKOS_CPPFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src
-
-KOKKOS_CXXFLAGS =
-ifeq ($(KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS), 1)
-  KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_WARNINGS)
-endif
-
-KOKKOS_LIBS = -lkokkos -ldl
-KOKKOS_LDFLAGS = -L$(shell pwd)
-KOKKOS_SRC =
-KOKKOS_HEADERS =
-
-# Generating the KokkosCore_config.h file.
-
-tmp := $(shell echo "/* ---------------------------------------------" > KokkosCore_config.tmp)
-tmp := $(shell echo "Makefile constructed configuration:" >> KokkosCore_config.tmp)
-tmp := $(shell date >> KokkosCore_config.tmp)
-tmp := $(shell echo "----------------------------------------------*/" >> KokkosCore_config.tmp)
-
-tmp := $(shell echo '\#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)' >> KokkosCore_config.tmp)
-tmp := $(shell echo '\#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead."' >> KokkosCore_config.tmp)
-tmp := $(shell echo '\#else' >> KokkosCore_config.tmp)
-tmp := $(shell echo '\#define KOKKOS_CORE_CONFIG_H' >> KokkosCore_config.tmp)
-tmp := $(shell echo '\#endif' >> KokkosCore_config.tmp)
-
-tmp := $(shell echo "/* Execution Spaces */" >> KokkosCore_config.tmp)
-
-ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
-  tmp := $(shell echo "\#define KOKKOS_HAVE_CUDA 1" >> KokkosCore_config.tmp )
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
-  tmp := $(shell echo '\#define KOKKOS_ENABLE_ROCM 1' >> KokkosCore_config.tmp)
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
-  tmp := $(shell echo '\#define KOKKOS_ENABLE_OPENMPTARGET 1' >> KokkosCore_config.tmp)
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
-  tmp := $(shell echo '\#define KOKKOS_HAVE_OPENMP 1' >> KokkosCore_config.tmp)
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
-  tmp := $(shell echo "\#define KOKKOS_HAVE_PTHREAD 1" >> KokkosCore_config.tmp )
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1)
-  tmp := $(shell echo "\#define KOKKOS_HAVE_QTHREADS 1" >> KokkosCore_config.tmp )
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
-  tmp := $(shell echo "\#define KOKKOS_HAVE_SERIAL 1" >> KokkosCore_config.tmp )
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_TM), 1)
-  tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
-  tmp := $(shell echo "\#define KOKKOS_ENABLE_TM" >> KokkosCore_config.tmp )
-  tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_ISA_X86_64), 1)
-  tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
-  tmp := $(shell echo "\#define KOKKOS_USE_ISA_X86_64" >> KokkosCore_config.tmp )
-  tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_ISA_KNC), 1)
-  tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
-  tmp := $(shell echo "\#define KOKKOS_USE_ISA_KNC" >> KokkosCore_config.tmp )
-  tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCLE), 1)
-  tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
-  tmp := $(shell echo "\#define KOKKOS_USE_ISA_POWERPCLE" >> KokkosCore_config.tmp )
-  tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCBE), 1)
-  tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
-  tmp := $(shell echo "\#define KOKKOS_USE_ISA_POWERPCBE" >> KokkosCore_config.tmp )
-  tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
-endif
-
-tmp := $(shell echo "/* General Settings */" >> KokkosCore_config.tmp)
-ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1)
-  KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG)
-  tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp )
-endif
-
-ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Z), 1)
-  KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Z_FLAG)
-  tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp )
-  tmp := $(shell echo "\#define KOKKOS_HAVE_CXX1Z 1" >> KokkosCore_config.tmp )
-endif
-
-ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1)
-  ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
-    KOKKOS_CXXFLAGS += -lineinfo
-  endif
-
-  KOKKOS_CXXFLAGS += -g
-  KOKKOS_LDFLAGS += -g -ldl
-  tmp := $(shell echo "\#define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK 1" >> KokkosCore_config.tmp )
-  tmp := $(shell echo "\#define KOKKOS_HAVE_DEBUG 1" >> KokkosCore_config.tmp )
-  ifeq ($(KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK), 0)
-    tmp := $(shell echo "\#define KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK 1" >> KokkosCore_config.tmp )
-  endif
-endif
-
-ifeq ($(KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT), 1)
-  tmp := $(shell echo "\#define KOKKOS_ENABLE_PROFILING_LOAD_PRINT 1" >> KokkosCore_config.tmp )
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1)
-  KOKKOS_CPPFLAGS += -I$(HWLOC_PATH)/include
-  KOKKOS_LDFLAGS += -L$(HWLOC_PATH)/lib
-  KOKKOS_LIBS += -lhwloc
-  tmp := $(shell echo "\#define KOKKOS_HAVE_HWLOC 1" >> KokkosCore_config.tmp )
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1)
-  tmp := $(shell echo "\#define KOKKOS_USE_LIBRT 1" >> KokkosCore_config.tmp )
-  KOKKOS_LIBS += -lrt
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
-  KOKKOS_CPPFLAGS += -I$(MEMKIND_PATH)/include
-  KOKKOS_LDFLAGS += -L$(MEMKIND_PATH)/lib
-  KOKKOS_LIBS += -lmemkind
-  tmp := $(shell echo "\#define KOKKOS_HAVE_HBWSPACE 1" >> KokkosCore_config.tmp )
-endif
-
-ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 0)
-  tmp := $(shell echo "\#define KOKKOS_ENABLE_PROFILING" >> KokkosCore_config.tmp )
-endif
-
-tmp := $(shell echo "/* Optimization Settings */" >> KokkosCore_config.tmp)
-
-ifeq ($(KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION), 1)
-  tmp := $(shell echo "\#define KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION 1" >> KokkosCore_config.tmp )
-endif
-
-tmp := $(shell echo "/* Cuda Settings */" >> KokkosCore_config.tmp)
-
-ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
-  ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1)
-    tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp )
-  else
-    ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
-      tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp )
-    endif
-  endif
-
-  ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1)
-    tmp := $(shell echo "\#define KOKKOS_CUDA_USE_UVM 1" >> KokkosCore_config.tmp )
-  endif
-
-  ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1)
-    tmp := $(shell echo "\#define KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE 1" >> KokkosCore_config.tmp )
-    KOKKOS_CXXFLAGS += --relocatable-device-code=true
-    KOKKOS_LDFLAGS += --relocatable-device-code=true
-  endif
-
-  ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1)
-    ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
-      ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -gt 70; echo $$?),0)
-        tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp )
-        KOKKOS_CXXFLAGS += -expt-extended-lambda
-      else
-        $(warning Warning: Cuda Lambda support was requested but NVCC version is too low. This requires NVCC for Cuda version 7.5 or higher. Disabling Lambda support now.)
-      endif
-    endif
-
-    ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
-      tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp )
-    endif
-  endif
-
-  ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
-    tmp := $(shell echo "\#define KOKKOS_CUDA_CLANG_WORKAROUND" >> KokkosCore_config.tmp )
-  endif
-endif
-
-# Add Architecture flags.
-
-ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1)
-  tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV80 1" >> KokkosCore_config.tmp )
-
-  ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
-    KOKKOS_CXXFLAGS +=
-    KOKKOS_LDFLAGS +=
-  else
-    ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
-      KOKKOS_CXXFLAGS +=
-      KOKKOS_LDFLAGS +=
-    else
-      KOKKOS_CXXFLAGS += -march=armv8-a
-      KOKKOS_LDFLAGS += -march=armv8-a
-    endif
-  endif
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1)
-  tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV81 1" >> KokkosCore_config.tmp )
-
-  ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
-    KOKKOS_CXXFLAGS +=
-    KOKKOS_LDFLAGS +=
-  else
-    ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
-      KOKKOS_CXXFLAGS +=
-      KOKKOS_LDFLAGS +=
-    else
-      KOKKOS_CXXFLAGS += -march=armv8.1-a
-      KOKKOS_LDFLAGS += -march=armv8.1-a
-    endif
-  endif
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1)
-  tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV80 1" >> KokkosCore_config.tmp )
-  tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV8_THUNDERX 1" >> KokkosCore_config.tmp )
-
-  ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
-    KOKKOS_CXXFLAGS +=
-    KOKKOS_LDFLAGS +=
-  else
-    ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
-      KOKKOS_CXXFLAGS +=
-      KOKKOS_LDFLAGS +=
-    else
-      KOKKOS_CXXFLAGS += -march=armv8-a -mtune=thunderx
-      KOKKOS_LDFLAGS += -march=armv8-a -mtune=thunderx
-    endif
-  endif
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SSE42), 1)
-  tmp := $(shell echo "\#define KOKKOS_ARCH_SSE42 1" >> KokkosCore_config.tmp )
-
-  ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
-    KOKKOS_CXXFLAGS += -xSSE4.2
-    KOKKOS_LDFLAGS  += -xSSE4.2
-  else
-    ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
-
-    else
-      ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
-        KOKKOS_CXXFLAGS += -tp=nehalem
-        KOKKOS_LDFLAGS  += -tp=nehalem
-      else
-        # Assume that this is a really a GNU compiler.
-        KOKKOS_CXXFLAGS += -msse4.2
-        KOKKOS_LDFLAGS  += -msse4.2
-      endif
-    endif
-  endif
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
-  tmp := $(shell echo "\#define KOKKOS_ARCH_AVX 1" >> KokkosCore_config.tmp )
-
-  ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
-    KOKKOS_CXXFLAGS += -mavx
-    KOKKOS_LDFLAGS  += -mavx
-  else
-    ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
-
-    else
-      ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
-        KOKKOS_CXXFLAGS += -tp=sandybridge
-        KOKKOS_LDFLAGS  += -tp=sandybridge
-      else
-        # Assume that this is a really a GNU compiler.
-        KOKKOS_CXXFLAGS += -mavx
-        KOKKOS_LDFLAGS  += -mavx
-      endif
-    endif
-  endif
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER7), 1)
-  tmp := $(shell echo "\#define KOKKOS_ARCH_POWER7 1" >> KokkosCore_config.tmp )
-
-  ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
-
-  else
-    # Assume that this is a really a GNU compiler or it could be XL on P8.
-    KOKKOS_CXXFLAGS += -mcpu=power7 -mtune=power7
-    KOKKOS_LDFLAGS  += -mcpu=power7 -mtune=power7
-  endif
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1)
-  tmp := $(shell echo "\#define KOKKOS_ARCH_POWER8 1" >> KokkosCore_config.tmp )
-
-  ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
-
-  else
-    # Assume that this is a really a GNU compiler or it could be XL on P8.
-    KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8
-    KOKKOS_LDFLAGS  += -mcpu=power8 -mtune=power8
-  endif
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER9), 1)
-  tmp := $(shell echo "\#define KOKKOS_ARCH_POWER9 1" >> KokkosCore_config.tmp )
-
-  ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
-
-  else
-    # Assume that this is a really a GNU compiler or it could be XL on P9.
-    KOKKOS_CXXFLAGS += -mcpu=power9 -mtune=power9
-    KOKKOS_LDFLAGS  += -mcpu=power9 -mtune=power9
-  endif
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HSW), 1)
-  tmp := $(shell echo "\#define KOKKOS_ARCH_AVX2 1" >> KokkosCore_config.tmp )
-
-  ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
-    KOKKOS_CXXFLAGS += -xCORE-AVX2
-    KOKKOS_LDFLAGS  += -xCORE-AVX2
-  else
-    ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
-
-    else
-      ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
-        KOKKOS_CXXFLAGS += -tp=haswell
-        KOKKOS_LDFLAGS  += -tp=haswell
-      else
-        # Assume that this is a really a GNU compiler.
-        KOKKOS_CXXFLAGS += -march=core-avx2 -mtune=core-avx2
-        KOKKOS_LDFLAGS  += -march=core-avx2 -mtune=core-avx2
-      endif
-    endif
-  endif
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_ARCH_BDW), 1)
-  tmp := $(shell echo "\#define KOKKOS_ARCH_AVX2 1" >> KokkosCore_config.tmp )
-
-  ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
-    KOKKOS_CXXFLAGS += -xCORE-AVX2
-    KOKKOS_LDFLAGS  += -xCORE-AVX2
-  else
-    ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
-
-    else
-      ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
-        KOKKOS_CXXFLAGS += -tp=haswell
-        KOKKOS_LDFLAGS  += -tp=haswell
-      else
-        # Assume that this is a really a GNU compiler.
-        KOKKOS_CXXFLAGS += -march=core-avx2 -mtune=core-avx2 -mrtm
-        KOKKOS_LDFLAGS  += -march=core-avx2 -mtune=core-avx2 -mrtm
-      endif
-    endif
-  endif
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1)
-  tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512MIC 1" >> KokkosCore_config.tmp )
-
-  ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
-    KOKKOS_CXXFLAGS += -xMIC-AVX512
-    KOKKOS_LDFLAGS  += -xMIC-AVX512
-  else
-    ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
-
-    else
-      ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
-
-      else
-        # Asssume that this is really a GNU compiler.
-        KOKKOS_CXXFLAGS += -march=knl -mtune=knl
-        KOKKOS_LDFLAGS  += -march=knl -mtune=knl
-      endif
-    endif
-  endif
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1)
-  tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512XEON 1" >> KokkosCore_config.tmp )
-
-  ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
-    KOKKOS_CXXFLAGS += -xCORE-AVX512
-    KOKKOS_LDFLAGS  += -xCORE-AVX512
-  else
-    ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
-
-    else
-      ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
-
-      else
-        # Nothing here yet.
-        KOKKOS_CXXFLAGS += -march=skylake-avx512 -mtune=skylake-avx512 -mrtm
-        KOKKOS_LDFLAGS  += -march=skylake-avx512 -mtune=skylake-avx512 -mrtm
-      endif
-    endif
-  endif
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1)
-  tmp := $(shell echo "\#define KOKKOS_ARCH_KNC 1" >> KokkosCore_config.tmp )
-  KOKKOS_CXXFLAGS += -mmic
-  KOKKOS_LDFLAGS += -mmic
-endif
-
-# Figure out the architecture flag for Cuda.
-ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
-  ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
-    KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-arch
-  endif
-  ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
-    KOKKOS_INTERNAL_CUDA_ARCH_FLAG=--cuda-gpu-arch
-    KOKKOS_CXXFLAGS += -x cuda
-  endif
-
-  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1)
-    tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
-    tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER30 1" >> KokkosCore_config.tmp )
-    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_30
-  endif
-  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1)
-    tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
-    tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER32 1" >> KokkosCore_config.tmp )
-    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_32
-  endif
-  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1)
-    tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
-    tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER35 1" >> KokkosCore_config.tmp )
-    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_35
-  endif
-  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1)
-    tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
-    tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER37 1" >> KokkosCore_config.tmp )
-    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_37
-  endif
-  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1)
-    tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
-    tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL50 1" >> KokkosCore_config.tmp )
-    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_50
-  endif
-  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1)
-    tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
-    tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL52 1" >> KokkosCore_config.tmp )
-    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_52
-  endif
-  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1)
-    tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
-    tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL53 1" >> KokkosCore_config.tmp )
-    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_53
-  endif
-  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1)
-    tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp )
-    tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL60 1" >> KokkosCore_config.tmp )
-    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_60
-  endif
-  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1)
-    tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp )
-    tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL61 1" >> KokkosCore_config.tmp )
-    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_61
-  endif
-
-  ifneq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
-    KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)
-
-    ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
-      KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)
-    endif
-  endif
-endif
-
-# Figure out the architecture flag for ROCm.
-ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
-  # Lets start with adding architecture defines
-  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KAVERI), 1)
-    tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 701" >> KokkosCore_config.tmp )
-    tmp := $(shell echo "\#define KOKKOS_ARCH_KAVERI 1" >> KokkosCore_config.tmp )
-    KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx701 
-  endif
-  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_CARRIZO), 1)
-    tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 801" >> KokkosCore_config.tmp )
-    tmp := $(shell echo "\#define KOKKOS_ARCH_CARRIZO 1" >> KokkosCore_config.tmp )
-    KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx801 
-  endif
-  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_FIJI), 1)
-    tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 803" >> KokkosCore_config.tmp )
-    tmp := $(shell echo "\#define KOKKOS_ARCH_FIJI 1" >> KokkosCore_config.tmp )
-    KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx803
-  endif
-  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA), 1)
-    tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 900" >> KokkosCore_config.tmp )
-    tmp := $(shell echo "\#define KOKKOS_ARCH_VEGA 1" >> KokkosCore_config.tmp )
-    KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx900 
-  endif
-  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_GFX901), 1)
-    tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 901" >> KokkosCore_config.tmp )
-    tmp := $(shell echo "\#define KOKKOS_ARCH_GFX901 1" >> KokkosCore_config.tmp )
-    KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx901 
-  endif
- 
-  
-  KOKKOS_INTERNAL_HCC_PATH := $(shell which $(CXX))
-  ROCM_HCC_PATH ?= $(KOKKOS_INTERNAL_HCC_PATH:/bin/clang++=)
-
-  KOKKOS_CXXFLAGS += $(shell $(ROCM_HCC_PATH)/bin/hcc-config --cxxflags) 
-  KOKKOS_LDFLAGS += $(shell $(ROCM_HCC_PATH)/bin/hcc-config --ldflags) -lhc_am -lm 
-  KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_ROCM_ARCH_FLAG)
-
-  KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/ROCm/*.cpp)
-  KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/ROCm/*.hpp)
-endif
-
-KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h 2>&1)
-
-ifeq ($(KOKKOS_INTERNAL_LS_CONFIG), KokkosCore_config.h)
-  KOKKOS_INTERNAL_NEW_CONFIG := $(strip $(shell diff KokkosCore_config.h KokkosCore_config.tmp | grep define | wc -l))
-else
-  KOKKOS_INTERNAL_NEW_CONFIG := 1
-endif
-
-ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0)
-  tmp := $(shell cp KokkosCore_config.tmp KokkosCore_config.h)
-endif
-
-KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/*.hpp)
-KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.hpp)
-KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/containers/src/*.hpp)
-KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.hpp)
-KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/algorithms/src/*.hpp)
-
-KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.cpp)
-KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp)
-
-ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
-  KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp)
-  KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp)
-  KOKKOS_CPPFLAGS += -I$(CUDA_PATH)/include
-  KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64
-  KOKKOS_LIBS += -lcudart -lcuda
-
-  ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
-    KOKKOS_CXXFLAGS += --cuda-path=$(CUDA_PATH)
-  endif
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
-  KOKKOS_SRC += $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp
-  KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMPTarget/*.hpp)
-  ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
-    KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG)
-  else
-    KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG)
-  endif
-  KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG)
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
-  KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.cpp)
-  KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp)
-
-  ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
-    KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMP_FLAG)
-  else
-    KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG)
-  endif
-
-  KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG)
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
-  KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.cpp)
-  KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp)
-  KOKKOS_LIBS += -lpthread
-endif
-
-ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1)
-  KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.cpp)
-  KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.hpp)
-  KOKKOS_CPPFLAGS += -I$(QTHREADS_PATH)/include
-  KOKKOS_LDFLAGS += -L$(QTHREADS_PATH)/lib
-  KOKKOS_LIBS += -lqthread
-endif
-
-# Explicitly set the GCC Toolchain for Clang.
-ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
-  KOKKOS_INTERNAL_GCC_PATH = $(shell which g++)
-  KOKKOS_INTERNAL_GCC_TOOLCHAIN = $(KOKKOS_INTERNAL_GCC_PATH:/bin/g++=)
-  KOKKOS_CXXFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN)
-  KOKKOS_LDFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN)
-endif
-
-# Don't include Kokkos_HBWSpace.cpp if not using MEMKIND to avoid a link warning.
-ifneq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
-  KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp,$(KOKKOS_SRC))
-endif
-
-# Don't include Kokkos_Profiling_Interface.cpp if not using profiling to avoid a link warning.
-ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 1)
-  KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp,$(KOKKOS_SRC))
-endif
-
-# Don't include Kokkos_Serial.cpp or Kokkos_Serial_Task.cpp if not using Serial
-# device to avoid a link warning.
-ifneq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
-  KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp,$(KOKKOS_SRC))
-  KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp,$(KOKKOS_SRC))
-endif
-
-# With Cygwin functions such as fdopen and fileno are not defined
-# when strict ansi is enabled. strict ansi gets enabled with --std=c++11
-# though. So we hard undefine it here. Not sure if that has any bad side effects
-# This is needed for gtest actually, not for Kokkos itself!
-ifeq ($(KOKKOS_INTERNAL_OS_CYGWIN), 1)
-  KOKKOS_CXXFLAGS += -U__STRICT_ANSI__
-endif
-
-# Setting up dependencies.
-
-KokkosCore_config.h:
-
-KOKKOS_CPP_DEPENDS := KokkosCore_config.h $(KOKKOS_HEADERS)
-
-KOKKOS_OBJ = $(KOKKOS_SRC:.cpp=.o)
-KOKKOS_OBJ_LINK = $(notdir $(KOKKOS_OBJ))
-
-include $(KOKKOS_PATH)/Makefile.targets
-
-kokkos-clean:
-	rm -f $(KOKKOS_OBJ_LINK) KokkosCore_config.h KokkosCore_config.tmp libkokkos.a
-
-libkokkos.a: $(KOKKOS_OBJ_LINK) $(KOKKOS_SRC) $(KOKKOS_HEADERS)
-	ar cr libkokkos.a $(KOKKOS_OBJ_LINK)
-	ranlib libkokkos.a
-
-KOKKOS_LINK_DEPENDS=libkokkos.a
diff --git a/lib/kokkos/algorithms/src/Kokkos_Random.hpp b/lib/kokkos/algorithms/src/Kokkos_Random.hpp
index 9082e47052937dd63a483099ae43a7c65e86bff3..3db9a145d7c9dc7ab1da3a1e6f034a2a0e7d929b 100644
--- a/lib/kokkos/algorithms/src/Kokkos_Random.hpp
+++ b/lib/kokkos/algorithms/src/Kokkos_Random.hpp
@@ -1265,6 +1265,243 @@ void Random_XorShift1024_Pool<Kokkos::Cuda>::free_state(const Random_XorShift102
 }
 
 
+#endif
+
+#if defined(KOKKOS_ENABLE_ROCM) 
+
+  template<>
+  class Random_XorShift1024<Kokkos::Experimental::ROCm> {
+  private:
+    int p_;
+    const int state_idx_;
+    uint64_t* state_;
+    const int stride_;
+    friend class Random_XorShift1024_Pool<Kokkos::Experimental::ROCm>;
+  public:
+
+    typedef Kokkos::Experimental::ROCm device_type;
+    typedef Random_XorShift1024_Pool<device_type> pool_type;
+
+    enum {MAX_URAND = 0xffffffffU};
+    enum {MAX_URAND64 = 0xffffffffffffffffULL-1};
+    enum {MAX_RAND = static_cast<int>(0xffffffffU/2)};
+    enum {MAX_RAND64 = static_cast<int64_t>(0xffffffffffffffffULL/2-1)};
+
+    KOKKOS_INLINE_FUNCTION
+    Random_XorShift1024 (const typename pool_type::state_data_type& state, int p, int state_idx = 0):
+      p_(p),state_idx_(state_idx),state_(&state(state_idx,0)),stride_(state.stride_1()){
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    uint32_t urand() {
+      uint64_t state_0 = state_[ p_ * stride_ ];
+      uint64_t state_1 = state_[ (p_ = ( p_ + 1 ) & 15) * stride_ ];
+      state_1 ^= state_1 << 31;
+      state_1 ^= state_1 >> 11;
+      state_0 ^= state_0 >> 30;
+      uint64_t tmp = ( state_[ p_ * stride_ ] = state_0 ^ state_1 ) * 1181783497276652981ULL;
+      tmp = tmp>>16;
+      return static_cast<uint32_t>(tmp&MAX_URAND);
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    uint64_t urand64() {
+      uint64_t state_0 = state_[ p_ * stride_ ];
+      uint64_t state_1 = state_[ (p_ = ( p_ + 1 ) & 15) * stride_ ];
+      state_1 ^= state_1 << 31;
+      state_1 ^= state_1 >> 11;
+      state_0 ^= state_0 >> 30;
+      return (( state_[ p_ * stride_ ] = state_0 ^ state_1 ) * 1181783497276652981LL) - 1;
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    uint32_t urand(const uint32_t& range) {
+      const uint32_t max_val = (MAX_URAND/range)*range;
+      uint32_t tmp = urand();
+      while(tmp>=max_val)
+        urand();
+      return tmp%range;
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    uint32_t urand(const uint32_t& start, const uint32_t& end ) {
+      return urand(end-start)+start;
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    uint64_t urand64(const uint64_t& range) {
+      const uint64_t max_val = (MAX_URAND64/range)*range;
+      uint64_t tmp = urand64();
+      while(tmp>=max_val)
+        urand64();
+      return tmp%range;
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    uint64_t urand64(const uint64_t& start, const uint64_t& end ) {
+      return urand64(end-start)+start;
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    int rand() {
+      return static_cast<int>(urand()/2);
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    int rand(const int& range) {
+      const int max_val = (MAX_RAND/range)*range;
+      int tmp = rand();
+      while(tmp>=max_val)
+        rand();
+      return tmp%range;
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    int rand(const int& start, const int& end ) {
+      return rand(end-start)+start;
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    int64_t rand64() {
+      return static_cast<int64_t>(urand64()/2);
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    int64_t rand64(const int64_t& range) {
+      const int64_t max_val = (MAX_RAND64/range)*range;
+      int64_t tmp = rand64();
+      while(tmp>=max_val)
+        rand64();
+      return tmp%range;
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    int64_t rand64(const int64_t& start, const int64_t& end ) {
+      return rand64(end-start)+start;
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    float frand() {
+      return 1.0f * urand64()/MAX_URAND64;
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    float frand(const float& range) {
+      return range * urand64()/MAX_URAND64;
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    float frand(const float& start, const float& end ) {
+      return frand(end-start)+start;
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    double drand() {
+      return 1.0 * urand64()/MAX_URAND64;
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    double drand(const double& range) {
+      return range * urand64()/MAX_URAND64;
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    double drand(const double& start, const double& end ) {
+      return frand(end-start)+start;
+    }
+
+    //Marsaglia polar method for drawing a standard normal distributed random number
+    KOKKOS_INLINE_FUNCTION
+    double normal() {
+      double S = 2.0;
+      double U;
+      while(S>=1.0) {
+        U = 2.0*drand() - 1.0;
+        const double V = 2.0*drand() - 1.0;
+        S = U*U+V*V;
+      }
+      return U*std::sqrt(-2.0*log(S)/S);
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    double normal(const double& mean, const double& std_dev=1.0) {
+      return mean + normal()*std_dev;
+    }
+  };
+
+template<>
+inline
+Random_XorShift64_Pool<Kokkos::Experimental::ROCm>::Random_XorShift64_Pool(uint64_t seed) {
+  num_states_ = 0;
+  init(seed,4*32768);
+}
+
+template<>
+KOKKOS_INLINE_FUNCTION
+Random_XorShift64<Kokkos::Experimental::ROCm> Random_XorShift64_Pool<Kokkos::Experimental::ROCm>::get_state() const {
+#ifdef __HCC_ACCELERATOR__
+  const int i_offset = (threadIdx_x*blockDim_y + threadIdx_y)*blockDim_z+threadIdx_z;
+  int i = (((blockIdx_x*gridDim_y+blockIdx_y)*gridDim_z + blockIdx_z) *
+           blockDim_x*blockDim_y*blockDim_z + i_offset)%num_states_;
+  while(Kokkos::atomic_compare_exchange(&locks_(i),0,1)) {
+      i+=blockDim_x*blockDim_y*blockDim_z;
+      if(i>=num_states_) {i = i_offset;}
+  }
+
+  return Random_XorShift64<Kokkos::Experimental::ROCm>(state_(i),i);
+#else
+  return Random_XorShift64<Kokkos::Experimental::ROCm>(state_(0),0);
+#endif
+}
+
+template<>
+KOKKOS_INLINE_FUNCTION
+void Random_XorShift64_Pool<Kokkos::Experimental::ROCm>::free_state(const Random_XorShift64<Kokkos::Experimental::ROCm> &state) const {
+#ifdef __HCC_ACCELERATOR__
+  state_(state.state_idx_) = state.state_;
+  locks_(state.state_idx_) = 0;
+  return;
+#endif
+}
+
+
+template<>
+inline
+Random_XorShift1024_Pool<Kokkos::Experimental::ROCm>::Random_XorShift1024_Pool(uint64_t seed) {
+  num_states_ = 0;
+  init(seed,4*32768);
+}
+
+template<>
+KOKKOS_INLINE_FUNCTION
+Random_XorShift1024<Kokkos::Experimental::ROCm> Random_XorShift1024_Pool<Kokkos::Experimental::ROCm>::get_state() const {
+#ifdef __HCC_ACCELERATOR__
+  const int i_offset = (threadIdx_x*blockDim_y + threadIdx_y)*blockDim_z+threadIdx_z;
+  int i = (((blockIdx_x*gridDim_y+blockIdx_y)*gridDim_z + blockIdx_z) *
+           blockDim_x*blockDim_y*blockDim_z + i_offset)%num_states_;
+  while(Kokkos::atomic_compare_exchange(&locks_(i),0,1)) {
+      i+=blockDim_x*blockDim_y*blockDim_z;
+      if(i>=num_states_) {i = i_offset;}
+  }
+
+  return Random_XorShift1024<Kokkos::Experimental::ROCm>(state_, p_(i), i);
+#else
+  return Random_XorShift1024<Kokkos::Experimental::ROCm>(state_, p_(0), 0);
+#endif
+}
+
+template<>
+KOKKOS_INLINE_FUNCTION
+void Random_XorShift1024_Pool<Kokkos::Experimental::ROCm>::free_state(const Random_XorShift1024<Kokkos::Experimental::ROCm> &state) const {
+#ifdef __HCC_ACCELERATOR__
+  for(int i=0; i<16; i++)
+    state_(state.state_idx_,i) = state.state_[i];
+  locks_(state.state_idx_) = 0;
+  return;
+#endif
+}
+
+
 #endif
 
 
diff --git a/lib/kokkos/algorithms/unit_tests/Makefile b/lib/kokkos/algorithms/unit_tests/Makefile
index b74192ef18d0c132e7b734bcb27796304744ed66..a5a10c82ee525d4b4ca9bdc2ba3ae002ce0858dd 100644
--- a/lib/kokkos/algorithms/unit_tests/Makefile
+++ b/lib/kokkos/algorithms/unit_tests/Makefile
@@ -30,6 +30,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
 	TEST_TARGETS += test-cuda
 endif
 
+ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
+	OBJ_ROCM = TestROCm.o UnitTestMain.o gtest-all.o
+	TARGETS += KokkosAlgorithms_UnitTest_ROCm
+	TEST_TARGETS += test-rocm
+endif
+
 ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
 	OBJ_THREADS = TestThreads.o UnitTestMain.o gtest-all.o
 	TARGETS += KokkosAlgorithms_UnitTest_Threads
@@ -51,6 +57,9 @@ endif
 KokkosAlgorithms_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS)
 	$(LINK) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosAlgorithms_UnitTest_Cuda
 
+KokkosAlgorithms_UnitTest_ROCm: $(OBJ_ROCM) $(KOKKOS_LINK_DEPENDS)
+	$(LINK) $(EXTRA_PATH) $(OBJ_ROCM) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosAlgorithms_UnitTest_ROCm
+
 KokkosAlgorithms_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS)
 	$(LINK) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosAlgorithms_UnitTest_Threads
 
@@ -63,6 +72,9 @@ KokkosAlgorithms_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS)
 test-cuda: KokkosAlgorithms_UnitTest_Cuda
 	./KokkosAlgorithms_UnitTest_Cuda
 
+test-rocm: KokkosAlgorithms_UnitTest_ROCm
+	./KokkosAlgorithms_UnitTest_ROCm
+
 test-threads: KokkosAlgorithms_UnitTest_Threads
 	./KokkosAlgorithms_UnitTest_Threads
 
diff --git a/lib/kokkos/algorithms/unit_tests/TestROCm.cpp b/lib/kokkos/algorithms/unit_tests/TestROCm.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..720b377ed2fb29a74d241ea6c42b46e3b15b5541
--- /dev/null
+++ b/lib/kokkos/algorithms/unit_tests/TestROCm.cpp
@@ -0,0 +1,112 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include <Kokkos_Macros.hpp>
+#ifdef KOKKOS_ENABLE_ROCM
+
+#include <cstdint>
+#include <iostream>
+#include <iomanip>
+
+#include <gtest/gtest.h>
+
+#include <Kokkos_Core.hpp>
+
+#include <TestRandom.hpp>
+#include <TestSort.hpp>
+
+namespace Test {
+
+class rocm : public ::testing::Test {
+protected:
+  static void SetUpTestCase()
+  {
+    std::cout << std::setprecision(5) << std::scientific;
+    Kokkos::HostSpace::execution_space::initialize();
+    Kokkos::Experimental::ROCm::initialize( Kokkos::Experimental::ROCm::SelectDevice(0) );
+  }
+  static void TearDownTestCase()
+  {
+    Kokkos::Experimental::ROCm::finalize();
+    Kokkos::HostSpace::execution_space::finalize();
+  }
+};
+
+void rocm_test_random_xorshift64( int num_draws  )
+{
+  Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Experimental::ROCm> >(num_draws);
+}
+
+void rocm_test_random_xorshift1024( int num_draws  )
+{
+  Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::Experimental::ROCm> >(num_draws);
+}
+
+
+#define ROCM_RANDOM_XORSHIFT64( num_draws )  \
+  TEST_F( rocm, Random_XorShift64 ) {        \
+  rocm_test_random_xorshift64(num_draws);    \
+  }
+
+#define ROCM_RANDOM_XORSHIFT1024( num_draws )  \
+  TEST_F( rocm, Random_XorShift1024 ) {        \
+  rocm_test_random_xorshift1024(num_draws);    \
+  }
+
+#define ROCM_SORT_UNSIGNED( size )                                    \
+  TEST_F( rocm, SortUnsigned ) {                                      \
+      Impl::test_sort< Kokkos::Experimental::ROCm, unsigned >(size);  \
+  }
+
+ROCM_RANDOM_XORSHIFT64(  132141141 )
+ROCM_RANDOM_XORSHIFT1024( 52428813 )
+ROCM_SORT_UNSIGNED(171)
+
+#undef ROCM_RANDOM_XORSHIFT64
+#undef ROCM_RANDOM_XORSHIFT1024
+#undef ROCM_SORT_UNSIGNED
+}
+#else
+void KOKKOS_ALGORITHMS_UNITTESTS_TESTROCM_PREVENT_LINK_ERROR() {}
+#endif  /* #ifdef KOKKOS_ENABLE_ROCM */
+
diff --git a/lib/kokkos/bin/hpcbind b/lib/kokkos/bin/hpcbind
index ca34648780174d626bc2b04dbbbb282eda3f9dff..b88b334f8bdf22365d5c8159ffebb47de9ecb2a7 100755
--- a/lib/kokkos/bin/hpcbind
+++ b/lib/kokkos/bin/hpcbind
@@ -27,7 +27,7 @@ fi
 HPCBIND_HWLOC_PARENT_CPUSET=""
 if [[ ${HPCBIND_HAS_HWLOC} -eq 1 ]]; then
   MY_PID="$BASHPID"
-  HPCBIND_HWLOC_PARENT_CPUSET=$(hwloc-ps --cpuset | grep "${MY_PID}" | cut -f 2)
+  HPCBIND_HWLOC_PARENT_CPUSET="$(hwloc-ps -a --cpuset | grep ${MY_PID} | cut -f 2)"
 fi
 
 ################################################################################
@@ -58,23 +58,34 @@ declare -i HPCBIND_ENABLE_GPU_MAPPING=$((NUM_GPUS > 0))
 ################################################################################
 HPCBIND_QUEUE_NAME=""
 declare -i HPCBIND_QUEUE_INDEX=0
-declare -i HPCBIND_QUEUE_GPU_MAPPING=0
-
-if [[ ! -z "${SLURM_LOCAL_ID}" ]]; then
-  HPCBIND_QUEUE_GPU_MAPPING=1
-  HPCBIND_QUEUE_NAME="sbatch"
+declare -i HPCBIND_QUEUE_MAPPING=0
+
+if [[ ! -z "${PMI_RANK}" ]]; then
+  HPCBIND_QUEUE_MAPPING=1
+  HPCBIND_QUEUE_NAME="mpich"
+  HPCBIND_QUEUE_INDEX=${PMI_RANK}
+elif [[ ! -z "${OMPI_COMM_WORLD_RANK}" ]]; then
+  HPCBIND_QUEUE_MAPPING=1
+  HPCBIND_QUEUE_NAME="openmpi"
+  HPCBIND_QUEUE_INDEX=${OMPI_COMM_WORLD_RANK}
+elif [[ ! -z "${MV2_COMM_WORLD_RANK}" ]]; then
+  HPCBIND_QUEUE_MAPPING=1
+  HPCBIND_QUEUE_NAME="mvapich2"
+  HPCBIND_QUEUE_INDEX=${MV2_COMM_WORLD_RANK}
+elif [[ ! -z "${SLURM_LOCAL_ID}" ]]; then
+  HPCBIND_QUEUE_MAPPING=1
+  HPCBIND_QUEUE_NAME="slurm"
   HPCBIND_QUEUE_INDEX=${SLURM_LOCAL_ID}
 elif [[ ! -z "${LBS_JOBINDEX}" ]]; then
-  HPCBIND_QUEUE_GPU_MAPPING=1
+  HPCBIND_QUEUE_MAPPING=1
   HPCBIND_QUEUE_NAME="bsub"
   HPCBIND_QUEUE_INDEX=${LBS_JOBINDEX}
 elif [[ ! -z "${ALPS_APP_PE}" ]]; then
-  HPCBIND_QUEUE_GPU_MAPPING=1
+  HPCBIND_QUEUE_MAPPING=1
   HPCBIND_QUEUE_NAME="aprun"
   HPCBIND_QUEUE_INDEX=${ALPS_APP_PE}
 fi
 
-
 ################################################################################
 # Show help
 ################################################################################
@@ -91,13 +102,14 @@ function show_help {
   echo "  --proc-bind=<LOC>     Set the initial process mask for the script"
   echo "                        LOC can be any valid location argument for"
   echo "                        hwloc-calc  Default: all"
+  echo "  --whole-system        ${cmd} will ignore the its parent process binding"
   echo "  --distribute=N        Distribute the current cpuset into N partitions"
   echo "  --distribute-partition=I"
   echo "                        Use the i'th partition (zero based)"
   echo "  --visible-gpus=<L>    Comma separated list of gpu ids"
   echo "                        Default: CUDA_VISIBLE_DEVICES or all gpus in"
   echo "                        sequential order"
-  echo "  --gpu-ignore-queue    Ignore queue job id when choosing visible GPU"
+  echo "  --ignore-queue        Ignore queue job id when choosing visible GPU and partition"
   echo "  --no-gpu-mapping      Do not set CUDA_VISIBLE_DEVICES"
   echo "  --openmp=M.m          Set env variables for the given OpenMP version"
   echo "                        Default: 4.0"
@@ -110,22 +122,30 @@ function show_help {
   echo "  --force-openmp-proc-bind=<OP>"
   echo "                        Override logic for selecting OMP_PROC_BIND"
   echo "  --no-openmp-nested    Set OMP_NESTED to false"
-  echo "  --show-bindings       Show the bindings"
-  echo "  --lstopo              Show bindings in lstopo without executing a command"
-  echo "  -v|--verbose          Show options and relevant environment variables"
+  echo "  --output-prefix=<P>   Save the output to files of the form"
+  echo "                        P-N.log, P-N.out and P-N.err where P is the prefix"
+  echo "                        and N is the queue index or mpi rank (no spaces)"
+  echo "  --output-mode=<Op>    How console output should be handled."
+  echo "                        Options are all, rank0, and none.  Default: rank0" 
+  echo "  --lstopo              Show bindings in lstopo"
+  echo "  -v|--verbose          Print bindings and relevant environment variables"
   echo "  -h|--help             Show this message"
   echo ""
   echo "Sample Usage:"
   echo "  Split the current process cpuset into 4 and use the 3rd partition"
   echo "    ${cmd} --distribute=4 --distribute-partition=2 -v -- command ..."
-  echo "  Bing the process to all even cores"
+  echo "  Launch 16 jobs over 4 nodes with 4 jobs per node using only the even pus"
+  echo "  and save the output to rank specific files"
+  echo "    mpiexec -N 16 -npernode 4 ${cmd} --whole-system --proc-bind=pu:even \\"
+  echo "      --distribute=4 -v --output-prefix=output  -- command ..."
+  echo "  Bind the process to all even cores"
   echo "    ${cmd} --proc-bind=core:even -v -- command ..."
-  echo "  Bind to the first 64 cores and split the current process cpuset into 4"
-  echo "    ${cmd} --proc-bind=core:0-63 --distribute=4 --distribute-partition=0 -- command ..."
-  echo "  skip GPU 0 when mapping visible devices"
+  echo "  Bind the the even cores of socket 0 and the odd cores of socket 1"
+  echo "    ${cmd} --proc-bind='socket:0.core:even socket:1.core:odd' -v -- command ..."
+  echo "  Skip GPU 0 when mapping visible devices"
   echo "    ${cmd} --distribute=4 --distribute-partition=0 --visible-gpus=1,2 -v -- command ..."
   echo "  Display the current bindings"
-  echo "    ${cmd} --proc-bind=numa:0 --show-bindings -- command"
+  echo "    ${cmd} --proc-bind=numa:0 -- command"
   echo "  Display the current bindings using lstopo"
   echo "    ${cmd} --proc-bind=numa:0.core:odd --lstopo"
   echo ""
@@ -144,7 +164,7 @@ fi
 declare -a UNKNOWN_ARGS=()
 declare -i HPCBIND_ENABLE_HWLOC_BIND=${HPCBIND_HAS_HWLOC}
 declare -i HPCBIND_DISTRIBUTE=1
-declare -i HPCBIND_PARTITION=0
+declare -i HPCBIND_PARTITION=-1
 HPCBIND_PROC_BIND="all"
 HPCBIND_OPENMP_VERSION=4.0
 declare -i HPCBIND_OPENMP_PERCENT=100
@@ -155,11 +175,15 @@ HPCBIND_OPENMP_FORCE_PROC_BIND=""
 HPCBIND_OPENMP_NESTED=${OMP_NESTED:-true}
 declare -i HPCBIND_VERBOSE=0
 
-declare -i HPCBIND_SHOW_BINDINGS=0
 declare -i HPCBIND_LSTOPO=0
 
-for i in $@; do
-  case $i in
+HPCBIND_OUTPUT_PREFIX=""
+HPCBIND_OUTPUT_MODE="rank0"
+
+declare -i HPCBIND_HAS_COMMAND=0
+
+for i in "$@"; do
+  case "$i" in
     # number of partitions to create
     --no-hwloc-bind)
       HPCBIND_ENABLE_HWLOC_BIND=0
@@ -169,6 +193,10 @@ for i in $@; do
       HPCBIND_PROC_BIND="${i#*=}"
       shift
       ;;
+    --whole-system)
+      HPCBIND_HWLOC_PARENT_CPUSET=""
+      shift
+      ;;
     --distribute=*)
       HPCBIND_DISTRIBUTE="${i#*=}"
       shift
@@ -182,8 +210,8 @@ for i in $@; do
       HPCBIND_VISIBLE_GPUS=$(echo "${i#*=}" | tr ',' ' ')
       shift
       ;;
-    --gpu-ignore-queue)
-      HPCBIND_QUEUE_GPU_MAPPING=0
+    --ignore-queue)
+      HPCBIND_QUEUE_MAPPING=0
       shift
       ;;
     --no-gpu-mapping)
@@ -218,14 +246,18 @@ for i in $@; do
       HPCBIND_OPENMP_NESTED="false"
       shift
       ;;
-    --show-bindings)
-      HPCBIND_VERBOSE=1
-      HPCBIND_SHOW_BINDINGS=1
+    --output-prefix=*)
+      HPCBIND_OUTPUT_PREFIX="${i#*=}"
+      shift
+      ;;
+    --output-mode=*)
+      HPCBIND_OUTPUT_MODE="${i#*=}"
+      #convert to lower case
+      HPCBIND_OUTPUT_MODE="${HPCBIND_OUTPUT_MODE,,}"
       shift
       ;;
     --lstopo)
       HPCBIND_VERBOSE=1
-      HPCBIND_SHOW_BINDINGS=0
       HPCBIND_LSTOPO=1
       shift
       ;;
@@ -239,6 +271,7 @@ for i in $@; do
       ;;
     # ignore remaining arguments
     --)
+      HPCBIND_HAS_COMMAND=1
       shift
       break
       ;;
@@ -250,16 +283,41 @@ for i in $@; do
   esac
 done
 
+################################################################################
+# Check output mode
+################################################################################
+declare -i HPCBIND_TEE=0
+
+if [[ "${HPCBIND_OUTPUT_MODE}" == "none" ]]; then
+  HPCBIND_TEE=0
+elif [[ "${HPCBIND_OUTPUT_MODE}" == "all" ]]; then
+  HPCBIND_TEE=1
+elif [[ ${HPCBIND_QUEUE_INDEX} -eq 0 ]]; then
+  #default to rank0 printing to screen
+  HPCBIND_TEE=1
+fi
+
+
+if [[ "${HPCBIND_OUTPUT_PREFIX}" == "" ]]; then
+  HPCBIND_LOG=/dev/null
+  HPCBIND_ERR=/dev/null
+  HPCBIND_OUT=/dev/null
+else
+  HPCBIND_LOG="${HPCBIND_OUTPUT_PREFIX}-${HPCBIND_QUEUE_INDEX}.hpc.log"
+  HPCBIND_ERR="${HPCBIND_OUTPUT_PREFIX}-${HPCBIND_QUEUE_INDEX}.err"
+  HPCBIND_OUT="${HPCBIND_OUTPUT_PREFIX}-${HPCBIND_QUEUE_INDEX}.out"
+  > ${HPCBIND_LOG}
+fi
+
 
 ################################################################################
 # Check unknown arguments
 ################################################################################
 if [[ ${#UNKNOWN_ARGS[*]} > 0 ]]; then
-  echo "Uknown options: ${UNKNOWN_ARGS[*]}"
+  echo "HPCBIND Uknown options: ${UNKNOWN_ARGS[*]}" > >(tee -a ${HPCBIND_LOG})
   exit 1
 fi
 
-
 ################################################################################
 # Check that visible gpus are valid
 ################################################################################
@@ -268,22 +326,19 @@ if [[ ${HPCBIND_ENABLE_GPU_MAPPING} -eq 1 ]]; then
   for ((i=0; i < ${#HPCBIND_VISIBLE_GPUS[*]}; i++)); do
     if [[ ${HPCBIND_VISIBLE_GPUS[$i]} -ge ${NUM_GPUS} ||
       ${HPCBIND_VISIBLE_GPUS[$i]} -lt 0 ]]; then
-      echo "Invaild GPU ID ${HPCBIND_VISIBLE_GPUS[$i]}, setting to 0"
+      echo "HPCBIND Invaild GPU ID ${HPCBIND_VISIBLE_GPUS[$i]} (setting to 0)" > >(tee -a ${HPCBIND_LOG})
       HPCBIND_VISIBLE_GPUS[$i]=0;
     fi
   done
   NUM_GPUS=${#HPCBIND_VISIBLE_GPUS[@]}
 fi
 
-
 ################################################################################
 # Check OpenMP percent
 ################################################################################
 if [[ ${HPCBIND_OPENMP_PERCENT} -lt 1 ]]; then
-  echo "OpenMP percent < 1, setting to 1"
   HPCBIND_OPENMP_PERCENT=1
 elif [[ ${HPCBIND_OPENMP_PERCENT} -gt 100 ]]; then
-  echo "OpenMP percent > 100, setting to 100"
   HPCBIND_OPENMP_PERCENT=100
 fi
 
@@ -291,15 +346,21 @@ fi
 # Check distribute
 ################################################################################
 if [[ ${HPCBIND_DISTRIBUTE} -le 0 ]]; then
-  echo "Invalid input for distribute, changing distribute to 1"
   HPCBIND_DISTRIBUTE=1
 fi
 
-if [[ ${HPCBIND_PARTITION} -ge ${HPCBIND_DISTRIBUTE} ]]; then
-  echo "Invalid input for distribute-partition, changing to 0"
+################################################################################
+#choose the correct partition
+################################################################################
+if [[ ${HPCBIND_PARTITION} -lt 0 && ${HPCBIND_QUEUE_MAPPING} -eq 1 ]]; then
+  HPCBIND_PARTITION=${HPCBIND_QUEUE_INDEX}
+elif [[ ${HPCBIND_PARTITION} -lt 0 ]]; then
   HPCBIND_PARTITION=0
 fi
 
+if [[ ${HPCBIND_PARTITION} -ge ${HPCBIND_DISTRIBUTE} ]]; then
+  HPCBIND_PARTITION=$((HPCBIND_PARTITION % HPCBIND_DISTRIBUTE))
+fi
 
 ################################################################################
 # Find cpuset and num threads
@@ -309,13 +370,17 @@ declare -i HPCBIND_NUM_PUS=0
 
 if [[ ${HPCBIND_ENABLE_HWLOC_BIND} -eq 1 ]]; then
   if [[ "${HPCBIND_HWLOC_PARENT_CPUSET}" == "" ]]; then
-    BINDING=$(hwloc-calc ${HPCBIND_PROC_BIND})
+    BINDING=$(hwloc-calc ${HPCBIND_PROC_BIND[*]})
   else
-    BINDING=$(hwloc-calc --restrict ${HPCBIND_HWLOC_PARENT_CPUSET} ${HPCBIND_PROC_BIND})
+    BINDING=$(hwloc-calc --restrict ${HPCBIND_HWLOC_PARENT_CPUSET} ${HPCBIND_PROC_BIND[*]})
   fi
 
-  CPUSETS=($(hwloc-distrib --restrict ${BINDING} --at core ${HPCBIND_DISTRIBUTE}))
-  HPCBIND_HWLOC_CPUSET=${CPUSETS[${HPCBIND_PARTITION}]}
+  if [[ ${HPCBIND_DISTRIBUTE} -gt 1 ]]; then
+    CPUSETS=($(hwloc-distrib --restrict ${BINDING} --at core ${HPCBIND_DISTRIBUTE}))
+    HPCBIND_HWLOC_CPUSET="${CPUSETS[${HPCBIND_PARTITION}]}"
+  else
+    HPCBIND_HWLOC_CPUSET="${BINDING}"
+  fi
   HPCBIND_NUM_PUS=$(hwloc-ls --restrict ${HPCBIND_HWLOC_CPUSET} --only pu | wc -l)
 else
   HPCBIND_NUM_PUS=$(cat /proc/cpuinfo | grep -c processor)
@@ -373,13 +438,13 @@ export OMP_NESTED=${HPCBIND_OPENMP_NESTED}
 ################################################################################
 
 if [[ ${HPCBIND_ENABLE_GPU_MAPPING} -eq 1 ]]; then
-  if [[ ${HPCBIND_QUEUE_GPU_MAPPING} -eq 0 ]]; then
+  if [[ ${HPCBIND_QUEUE_MAPPING} -eq 0 ]]; then
     declare -i GPU_ID=$((HPCBIND_PARTITION % NUM_GPUS))
-    export CUDA_VISIBLE_DEVICES=${HPCBIND_VISIBLE_GPUS[${GPU_ID}]}
+    export CUDA_VISIBLE_DEVICES="${HPCBIND_VISIBLE_GPUS[${GPU_ID}]}"
   else
     declare -i MY_TASK_ID=$((HPCBIND_QUEUE_INDEX * HPCBIND_DISTRIBUTE + HPCBIND_PARTITION))
     declare -i GPU_ID=$((MY_TASK_ID % NUM_GPUS))
-    export CUDA_VISIBLE_DEVICES=${HPCBIND_VISIBLE_GPUS[${GPU_ID}]}
+    export CUDA_VISIBLE_DEVICES="${HPCBIND_VISIBLE_GPUS[${GPU_ID}]}"
   fi
 fi
 
@@ -389,22 +454,22 @@ fi
 export HPCBIND_HAS_HWLOC=${HPCBIND_HAS_HWLOC}
 export HPCBIND_HAS_NVIDIA=${HPCBIND_HAS_NVIDIA}
 export HPCBIND_NUM_PUS=${HPCBIND_NUM_PUS}
-export HPCBIND_HWLOC_CPUSET=${HPCBIND_HWLOC_CPUSET}
+export HPCBIND_HWLOC_CPUSET="${HPCBIND_HWLOC_CPUSET}"
 export HPCBIND_HWLOC_DISTRIBUTE=${HPCBIND_DISTRIBUTE}
 export HPCBIND_HWLOC_DISTRIBUTE_PARTITION=${HPCBIND_PARTITION}
 if [[ "${HPCBIND_HWLOC_PARENT_CPUSET}" == "" ]]; then
   export HPCBIND_HWLOC_PARENT_CPUSET="all"
 else
-  export HPCBIND_HWLOC_PARENT_CPUSET=${HPCBIND_HWLOC_PARENT_CPUSET}
+  export HPCBIND_HWLOC_PARENT_CPUSET="${HPCBIND_HWLOC_PARENT_CPUSET}"
 fi
-export HPCBIND_HWLOC_PROC_BIND=${HPCBIND_PROC_BIND}
+export HPCBIND_HWLOC_PROC_BIND="${HPCBIND_PROC_BIND}"
 export HPCBIND_NVIDIA_ENABLE_GPU_MAPPING=${HPCBIND_ENABLE_GPU_MAPPING}
 export HPCBIND_NVIDIA_VISIBLE_GPUS=$(echo "${HPCBIND_VISIBLE_GPUS[*]}" | tr ' ' ',')
-export HPCBIND_OPENMP_VERSION=${HPCBIND_OPENMP_VERSION}
+export HPCBIND_OPENMP_VERSION="${HPCBIND_OPENMP_VERSION}"
 if [[ "${HPCBIND_QUEUE_NAME}" != "" ]]; then
   export HPCBIND_QUEUE_INDEX=${HPCBIND_QUEUE_INDEX}
-  export HPCBIND_QUEUE_NAME=${HPCBIND_QUEUE_NAME}
-  export HPCBIND_QUEUE_GPU_MAPPING=${HPCBIND_QUEUE_GPU_MAPPING}
+  export HPCBIND_QUEUE_NAME="${HPCBIND_QUEUE_NAME}"
+  export HPCBIND_QUEUE_MAPPING=${HPCBIND_QUEUE_MAPPING}
 fi
 
 
@@ -412,43 +477,63 @@ fi
 # Print verbose
 ################################################################################
 
-if [[ ${HPCBIND_VERBOSE} -eq 1 ]]; then
-  MY_ENV=$(env | sort)
-  echo "[HPCBIND]"
-  echo "${MY_ENV}" | grep -E "^HPCBIND_"
-  echo "[CUDA]"
-  echo "${MY_ENV}" | grep -E "^CUDA_"
-  echo "[OPENMP]"
-  echo "${MY_ENV}" | grep -E "^OMP_"
-fi
+TMP_ENV=$(env | sort)
+if [[ ${HPCBIND_TEE} -eq 0 || ${HPCBIND_VERBOSE} -eq 0 ]]; then
+  echo "[HOST]" >> ${HPCBIND_LOG}
+  hostname -s >> ${HPCBIND_LOG}
+  echo "[HPCBIND]" >> ${HPCBIND_LOG}
+  echo "${TMP_ENV}" | grep -E "^HPCBIND_" >> ${HPCBIND_LOG}
+  echo "[CUDA]" >> ${HPCBIND_LOG}
+  echo "${TMP_ENV}" | grep -E "^CUDA_" >> ${HPCBIND_LOG}
+  echo "[OPENMP]" >> ${HPCBIND_LOG}
+  echo "${TMP_ENV}" | grep -E "^OMP_" >> ${HPCBIND_LOG}
 
-if [[ ${HPCBIND_HAS_HWLOC} -eq 1 && ${HPCBIND_SHOW_BINDINGS} -eq 1 ]]; then
-  echo "[BINDINGS]"
-  hwloc-ls --restrict ${HPCBIND_HWLOC_CPUSET} --only pu
-elif [[ ${HPCBIND_SHOW_BINDINGS} -eq 1 ]]; then
-  echo "Unable to show bindings, hwloc not available."
+  if [[ ${HPCBIND_HAS_HWLOC} -eq 1 ]]; then
+    echo "[BINDINGS]" >> ${HPCBIND_LOG}
+    hwloc-ls --restrict "${HPCBIND_HWLOC_CPUSET}" --only pu >> ${HPCBIND_LOG}
+  else
+    echo "Unable to show bindings, hwloc not available." >> ${HPCBIND_LOG}
+  fi
+else
+  echo "[HOST]" > >(tee -a ${HPCBIND_LOG})
+  hostname -s > >(tee -a ${HPCBIND_LOG})
+  echo "[HPCBIND]" > >(tee -a ${HPCBIND_LOG})
+  echo "${TMP_ENV}" | grep -E "^HPCBIND_" > >(tee -a ${HPCBIND_LOG})
+  echo "[CUDA]" > >(tee -a ${HPCBIND_LOG})
+  echo "${TMP_ENV}" | grep -E "^CUDA_" > >(tee -a ${HPCBIND_LOG})
+  echo "[OPENMP]" > >(tee -a ${HPCBIND_LOG})
+  echo "${TMP_ENV}" | grep -E "^OMP_" > >(tee -a ${HPCBIND_LOG})
+
+  if [[ ${HPCBIND_HAS_HWLOC} -eq 1 ]]; then
+    echo "[BINDINGS]" > >(tee -a ${HPCBIND_LOG})
+    hwloc-ls --restrict "${HPCBIND_HWLOC_CPUSET}" --only pu > >(tee -a ${HPCBIND_LOG})
+  else
+    echo "Unable to show bindings, hwloc not available." > >(tee -a ${HPCBIND_LOG})
+  fi
 fi
 
 ################################################################################
 # Run command
 ################################################################################
 
-if [[ ${HPCBIND_LSTOPO} -eq 0 ]]; then
-  if [[ ${HPCBIND_ENABLE_HWLOC_BIND} -eq 1 ]]; then
-    hwloc-bind ${HPCBIND_HWLOC_CPUSET} -- $@
-  else
-    eval $@
-  fi
-else
-  if [[ ${HPCBIND_HAS_HWLOC} -eq 1 ]]; then
-    if [[ ${HPCBIND_ENABLE_HWLOC_BIND} -eq 1 && ! -z ${DISPLAY} ]]; then
-      echo "[BINDINGS]"
-      hwloc-ls --restrict ${HPCBIND_HWLOC_CPUSET} --only pu
-      hwloc-bind ${HPCBIND_HWLOC_CPUSET} -- lstopo --pid 0
+# must be the last executed command so that the return value is correct
+if [[ ${HPCBIND_LSTOPO} -eq 1 && ${HPCBIND_HAS_HWLOC} -eq 1 && ${HPCBIND_ENABLE_HWLOC_BIND} -eq 1 && ! -z ${DISPLAY} ]]; then
+  hwloc-bind "${HPCBIND_HWLOC_CPUSET}" -- lstopo --pid 0
+elif [[ ${HPCBIND_HAS_COMMAND} -eq 1 ]]; then
+  # clear output files
+  > ${HPCBIND_ERR}
+  > ${HPCBIND_OUT}
+  if [[ ${HPCBIND_TEE} -eq 0 ]]; then
+    if [[ ${HPCBIND_ENABLE_HWLOC_BIND} -eq 1 ]]; then
+      hwloc-bind "${HPCBIND_HWLOC_CPUSET}" -- $@ > ${HPCBIND_OUT} 2> ${HPCBIND_ERR}
     else
-      hwloc-ls --restrict ${HPCBIND_HWLOC_CPUSET}
+      eval $@ > ${HPCBIND_OUT} 2> ${HPCBIND_ERR}
     fi
   else
-    echo "Unable to show bindings, hwloc not available."
+    if [[ ${HPCBIND_ENABLE_HWLOC_BIND} -eq 1 ]]; then
+      hwloc-bind "${HPCBIND_HWLOC_CPUSET}" -- $@ > >(tee ${HPCBIND_OUT}) 2> >(tee ${HPCBIND_ERR} >&2)
+    else
+      eval $@ > >(tee ${HPCBIND_OUT}) 2> >(tee ${HPCBIND_ERR} >&2)
+    fi
   fi
 fi
diff --git a/lib/kokkos/bin/kokkos-bind b/lib/kokkos/bin/kokkos-bind
deleted file mode 100755
index b6fe07a1bd1c55d864c66d292da3782cb23eb0a5..0000000000000000000000000000000000000000
--- a/lib/kokkos/bin/kokkos-bind
+++ /dev/null
@@ -1,221 +0,0 @@
-#!/usr/bin/env bash
-
-# check if hwloc commands exist
-declare -i HAS_HWLOC=0
-type hwloc-bind >/dev/null 2>&1
-HAS_HWLOC="${HAS_HWLOC} + $?"
-
-type hwloc-distrib >/dev/null 2>&1
-HAS_HWLOC="${HAS_HWLOC} + $?"
-
-type hwloc-ls >/dev/null 2>&1
-HAS_HWLOC="${HAS_HWLOC} + $?"
-
-type hwloc-calc >/dev/null 2>&1
-HAS_HWLOC="${HAS_HWLOC} + $?"
-
-type hwloc-ps >/dev/null 2>&1
-HAS_HWLOC="${HAS_HWLOC} + $?"
-
-
-#parse args
-declare -a UNKNOWN_ARGS=()
-declare -i DISTRIBUTE=1
-declare -i INDEX=0
-PROC_BIND="all"
-CURRENT_CPUSET=""
-OPENMP_VERSION=4.0
-OPENMP_PROC_BIND=True
-OPENMP_NESTED=True
-VERBOSE=False
-
-#get the current process cpuset
-if [[ ${HAS_HWLOC} -eq 0 ]]; then
-  MY_PID="$BASHPID"
-  CURRENT_CPUSET=$(hwloc-ps --cpuset | grep "${MY_PID}" | cut -f 2)
-  echo "$CURRENT_CPUSET"
-fi
-
-function show_help {
-  local cmd=$(basename "$0")
-  echo "Usage: ${cmd} <options> -- command ..." 
-  echo "  Uses hwloc to divide the node into the given number of groups,"
-  echo "  set the appropriate OMP_NUM_THREADS and execute the command on the"
-  echo "  selected group."
-  echo ""
-  echo "  NOTE: This command assumes it has exclusive use of the node"
-  echo ""
-  echo "Options:"
-  echo "  --proc-bind=<LOC>     Set the initial process mask for the script.  "
-  echo "                        LOC can be any valid location argumnet for"
-  echo "                        hwloc-calc.  Defaults to the entire machine"
-  echo "  --distribute=N        Distribute the current proc-bind into N groups" 
-  echo "  --index=I             Use the i'th group (zero based)" 
-  echo "  --openmp=M.m          Set env variables for the given OpenMP version"
-  echo "                        (default 4.0)"
-  echo "  --no-openmp-proc-bind Set OMP_PROC_BIND to false and unset OMP_PLACES"    
-  echo "  --no-openmp-nested    Set OMP_NESTED to false"
-  echo "  -v|--verbose" 
-  echo "  -h|--help" 
-  echo ""
-  echo "Sample Usage:"
-  echo "  ${cmd} --distribute=4 --index=2 -v -- command ..."
-  echo ""
-}
-
-if [[ "$#" -eq 0 ]]; then
-  show_help 
-  exit 0
-fi
-
-
-for i in $@; do
-  case $i in
-    # number of partitions to create
-    --proc-bind=*)
-      PROC_BIND="${i#*=}"
-      shift
-      ;;
-    --distribute=*)
-      DISTRIBUTE="${i#*=}"
-      shift
-      ;;
-    # which group to use
-    --index=*)
-      INDEX="${i#*=}"
-      shift
-      ;;
-    --openmp=*)
-      OPENMP_VERSION="${i#*=}"
-      shift
-      ;;
-    --no-openmp-proc-bind)
-      OPENMP_PROC_BIND=False
-      shift
-      ;;
-    --no-openmp-nested)
-      OPENMP_NESTED=False
-      shift
-      ;;
-    -v|--verbose)
-      VERBOSE=True
-      shift
-      ;;
-    -h|--help)
-      show_help
-      exit 0
-      ;;
-    # ignore remaining arguments
-    --)
-      shift
-      break
-      ;;
-    # unknown option
-    *)
-      UNKNOWN_ARGS+=("$i")
-      shift
-      ;;
-  esac
-done
-
-if [[ ${#UNKNOWN_ARGS[*]} > 0 ]]; then
-  echo "Uknown options: ${UNKNOWN_ARGS[*]}"
-  exit 1
-fi
-
-if [[ ${DISTRIBUTE} -le 0 ]]; then
-  echo "Invalid input for distribute, changing distribute to 1"
-  DISTRIBUTE=1
-fi
-
-if [[ ${INDEX} -ge ${DISTRIBUTE} ]]; then
-  echo "Invalid input for index, changing index to 0"
-  INDEX=0
-fi
-
-if [[ ${HAS_HWLOC} -ne 0 ]]; then
-  echo "hwloc not found, no process binding will occur"
-  DISTRIBUTE=1
-  INDEX=0
-fi
-
-if [[ ${HAS_HWLOC} -eq 0 ]]; then
-
-  if [[ "${CURRENT_CPUSET}" == "" ]]; then
-    BINDING=$(hwloc-calc ${PROC_BIND})
-  else 
-    BINDING=$(hwloc-calc --restrict ${CURRENT_CPUSET} ${PROC_BIND})
-  fi
-
-  CPUSETS=($(hwloc-distrib --restrict ${BINDING} --at core ${DISTRIBUTE}))
-  CPUSET=${CPUSETS[${INDEX}]}
-  NUM_THREADS=$(hwloc-ls --restrict ${CPUSET} --only pu | wc -l)
-
-  if [[ "${VERBOSE}" == "True" ]]; then
-    echo "hwloc:         true"
-    echo "  proc_bind:     ${PROC_BIND}"
-    echo "  distribute:    ${DISTRIBUTE}"
-    echo "  index:         ${INDEX}"
-    echo "  parent_cpuset: ${CURRENT_CPUSET}"
-    echo "  cpuset:        ${CPUSET}"
-    echo "omp_num_threads: ${NUM_THREADS}"
-    echo "omp_proc_bind:   ${OPENMP_PROC_BIND}"
-    echo "omp_nested:      ${OPENMP_NESTED}"
-    echo "OpenMP:          ${OPENMP_VERSION}"
-  fi
-
-  # set OMP env
-  if [[ "${OPENMP_PROC_BIND}" == "True" ]]; then
-    if [[ "${OPENMP_VERSION}" == "4.0" || "${OPENMP_VERSION}" > "4.0" ]]; then
-      export OMP_PLACES="threads"
-      export OMP_PROC_BIND="spread"
-    else
-      export OMP_PROC_BIND="true"
-      unset OMP_PLACES
-    fi
-  else
-    unset OMP_PLACES
-    unset OMP_PROC_BIND
-  fi
-  if [[ "${OPENMP_NESTED}" == "True" ]]; then
-    export OMP_NESTED="true"
-  else
-    export OMP_NESTED="false"
-  fi
-  export OMP_NUM_THREADS="${NUM_THREADS}"
-
-  hwloc-bind ${CPUSET} -- $@
-else
-  NUM_THREADS=$(cat /proc/cpuinfo | grep -c processor)
-
-  if [[ "${VERBOSE}" == "True" ]]; then
-    echo "hwloc:           false"
-    echo "omp_num_threads: ${NUM_THREADS}"
-    echo "omp_proc_bind:   ${OPENMP_PROC_BIND}"
-    echo "omp_nested:      ${OPENMP_NESTED}"
-    echo "OpenMP:          ${OPENMP_VERSION}"
-  fi
-    
-  # set OMP env
-  if [[ "${OPENMP_PROC_BIND}" == "True" ]]; then
-    if [[ "${OPENMP_VERSION}" == "4.0" || "${OPENMP_VERSION}" > "4.0" ]]; then
-      export OMP_PLACES="threads"
-      export OMP_PROC_BIND="spread"
-    else
-      export OMP_PROC_BIND="true"
-      unset OMP_PLACES
-    fi
-  else
-    unset OMP_PLACES
-    unset OMP_PROC_BIND
-  fi
-  if [[ "${OPENMP_NESTED}" == "True" ]]; then
-    export OMP_NESTED="true"
-  else
-    export OMP_NESTED="false"
-  fi
-  export OMP_NUM_THREADS="${NUM_THREADS}"
-
-  eval $@
-fi
-
diff --git a/lib/kokkos/bin/nvcc_wrapper b/lib/kokkos/bin/nvcc_wrapper
deleted file mode 100755
index fc72bca5b91662b5ad31ac3ecb19c62f0d6c8996..0000000000000000000000000000000000000000
--- a/lib/kokkos/bin/nvcc_wrapper
+++ /dev/null
@@ -1,287 +0,0 @@
-#!/bin/bash
-#
-# This shell script (nvcc_wrapper) wraps both the host compiler and
-# NVCC, if you are building legacy C or C++ code with CUDA enabled.
-# The script remedies some differences between the interface of NVCC
-# and that of the host compiler, in particular for linking.
-# It also means that a legacy code doesn't need separate .cu files;
-# it can just use .cpp files.
-#
-# Default settings: change those according to your machine.  For
-# example, you may have have two different wrappers with either icpc
-# or g++ as their back-end compiler.  The defaults can be overwritten
-# by using the usual arguments (e.g., -arch=sm_30 -ccbin icpc).
-
-default_arch="sm_61"
-#default_arch="sm_50"
-
-#
-# The default C++ compiler.
-#
-host_compiler=${NVCC_WRAPPER_DEFAULT_COMPILER:-"g++"}
-#host_compiler="icpc"
-#host_compiler="/usr/local/gcc/4.8.3/bin/g++"
-#host_compiler="/usr/local/gcc/4.9.1/bin/g++"
-
-#
-# Internal variables
-#
-
-# C++ files
-cpp_files=""
-
-# Host compiler arguments
-xcompiler_args=""
-
-# Cuda (NVCC) only arguments
-cuda_args=""
-
-# Arguments for both NVCC and Host compiler
-shared_args=""
-
-# Linker arguments
-xlinker_args=""
-
-# Object files passable to NVCC
-object_files=""
-
-# Link objects for the host linker only
-object_files_xlinker=""
-
-# Shared libraries with version numbers are not handled correctly by NVCC
-shared_versioned_libraries_host=""
-shared_versioned_libraries=""
-
-# Does the User set the architecture 
-arch_set=0
-
-# Does the user overwrite the host compiler
-ccbin_set=0
-
-#Error code of compilation
-error_code=0
-
-# Do a dry run without actually compiling
-dry_run=0
-
-# Skip NVCC compilation and use host compiler directly
-host_only=0
-
-# Enable workaround for CUDA 6.5 for pragma ident 
-replace_pragma_ident=0
-
-# Mark first host compiler argument
-first_xcompiler_arg=1
-
-temp_dir=${TMPDIR:-/tmp}
-
-# Check if we have an optimization argument already
-optimization_applied=0
-
-#echo "Arguments: $# $@"
-
-while [ $# -gt 0 ]
-do
-  case $1 in
-  #show the executed command
-  --show|--nvcc-wrapper-show)
-    dry_run=1
-    ;;
-  #run host compilation only
-  --host-only)
-    host_only=1
-    ;;
-  #replace '#pragma ident' with '#ident' this is needed to compile OpenMPI due to a configure script bug and a non standardized behaviour of pragma with macros
-  --replace-pragma-ident)
-    replace_pragma_ident=1
-    ;;
-  #handle source files to be compiled as cuda files
-  *.cpp|*.cxx|*.cc|*.C|*.c++|*.cu)
-    cpp_files="$cpp_files $1"
-    ;;
-   # Ensure we only have one optimization flag because NVCC doesn't allow muliple
-  -O*)
-    if [ $optimization_applied -eq 1 ]; then
-       echo "nvcc_wrapper - *warning* you have set multiple optimization flags (-O*), only the first is used because nvcc can only accept a single optimization setting."
-    else
-       shared_args="$shared_args $1"
-       optimization_applied=1
-    fi
-    ;;
-  #Handle shared args (valid for both nvcc and the host compiler)
-  -D*|-c|-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared)
-    shared_args="$shared_args $1"
-    ;;
-  #Handle shared args that have an argument
-  -o|-MT)
-    shared_args="$shared_args $1 $2"
-    shift
-    ;;
-  #Handle known nvcc args
-  -gencode*|--dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage|-Xptxas*)
-    cuda_args="$cuda_args $1"
-    ;;
-  #Handle more known nvcc args
-  --expt-extended-lambda|--expt-relaxed-constexpr)
-    cuda_args="$cuda_args $1"
-    ;;
-  #Handle known nvcc args that have an argument
-  -rdc|-maxrregcount|--default-stream)
-    cuda_args="$cuda_args $1 $2"
-    shift
-    ;;
-  #Handle c++11 setting
-  --std=c++11|-std=c++11)
-    shared_args="$shared_args $1"
-    ;;
-  #strip of -std=c++98 due to nvcc warnings and Tribits will place both -std=c++11 and -std=c++98
-  -std=c++98|--std=c++98)
-    ;;
-  #strip of pedantic because it produces endless warnings about #LINE added by the preprocessor
-  -pedantic|-Wpedantic|-ansi)
-    ;;
-  #strip of -Woverloaded-virtual to avoid "cc1: warning: command line option ‘-Woverloaded-virtual’ is valid for C++/ObjC++ but not for C"
-  -Woverloaded-virtual)
-    ;;
-  #strip -Xcompiler because we add it
-  -Xcompiler)
-    if [ $first_xcompiler_arg -eq 1 ]; then
-      xcompiler_args="$2"
-      first_xcompiler_arg=0
-    else
-      xcompiler_args="$xcompiler_args,$2"
-    fi
-    shift
-    ;;
-  #strip of "-x cu" because we add that
-  -x)
-    if [[ $2 != "cu" ]]; then
-      if [ $first_xcompiler_arg -eq 1 ]; then
-        xcompiler_args="-x,$2"
-        first_xcompiler_arg=0
-      else
-        xcompiler_args="$xcompiler_args,-x,$2"
-      fi
-    fi
-    shift
-    ;;
-  #Handle -ccbin (if its not set we can set it to a default value)
-  -ccbin)
-    cuda_args="$cuda_args $1 $2"
-    ccbin_set=1
-    host_compiler=$2
-    shift
-    ;;
-  #Handle -arch argument (if its not set use a default
-  -arch*)
-    cuda_args="$cuda_args $1"
-    arch_set=1
-    ;;
-  #Handle -Xcudafe argument
-  -Xcudafe)
-    cuda_args="$cuda_args -Xcudafe $2"
-    shift
-    ;;
-  #Handle args that should be sent to the linker
-  -Wl*)
-    xlinker_args="$xlinker_args -Xlinker ${1:4:${#1}}"
-    host_linker_args="$host_linker_args ${1:4:${#1}}"
-    ;;
-  #Handle object files: -x cu applies to all input files, so give them to linker, except if only linking
-  *.a|*.so|*.o|*.obj)
-    object_files="$object_files $1"
-    object_files_xlinker="$object_files_xlinker -Xlinker $1"
-    ;;
-  #Handle object files which always need to use "-Xlinker": -x cu applies to all input files, so give them to linker, except if only linking
-  @*|*.dylib)
-    object_files="$object_files -Xlinker $1"
-    object_files_xlinker="$object_files_xlinker -Xlinker $1"
-    ;;
-  #Handle shared libraries with *.so.* names which nvcc can't do.
-  *.so.*)
-    shared_versioned_libraries_host="$shared_versioned_libraries_host $1"
-    shared_versioned_libraries="$shared_versioned_libraries -Xlinker $1"
-  ;;
-  #All other args are sent to the host compiler
-  *)
-    if [ $first_xcompiler_arg -eq 1 ]; then
-      xcompiler_args=$1
-      first_xcompiler_arg=0
-    else 
-      xcompiler_args="$xcompiler_args,$1"
-    fi
-    ;;
-  esac
-
-  shift
-done
-
-#Add default host compiler if necessary
-if [ $ccbin_set -ne 1 ]; then
-  cuda_args="$cuda_args -ccbin $host_compiler"
-fi
-
-#Add architecture command
-if [ $arch_set -ne 1 ]; then
-  cuda_args="$cuda_args -arch=$default_arch"
-fi
-
-#Compose compilation command
-nvcc_command="nvcc $cuda_args $shared_args $xlinker_args $shared_versioned_libraries"
-if [ $first_xcompiler_arg -eq 0 ]; then
-  nvcc_command="$nvcc_command -Xcompiler $xcompiler_args"
-fi
-
-#Compose host only command
-host_command="$host_compiler $shared_args $xcompiler_args $host_linker_args $shared_versioned_libraries_host"
-
-#nvcc does not accept '#pragma ident SOME_MACRO_STRING' but it does accept '#ident SOME_MACRO_STRING'
-if [ $replace_pragma_ident -eq 1 ]; then
-  cpp_files2=""
-  for file in $cpp_files
-  do
-    var=`grep pragma ${file} | grep ident | grep "#"`
-    if [ "${#var}" -gt 0 ]
-    then
-      sed 's/#[\ \t]*pragma[\ \t]*ident/#ident/g' $file > $temp_dir/nvcc_wrapper_tmp_$file
-      cpp_files2="$cpp_files2 $temp_dir/nvcc_wrapper_tmp_$file"
-    else
-      cpp_files2="$cpp_files2 $file"
-    fi
-  done
-  cpp_files=$cpp_files2
-  #echo $cpp_files
-fi
-
-if [ "$cpp_files" ]; then
-  nvcc_command="$nvcc_command $object_files_xlinker -x cu $cpp_files"
-else
-  nvcc_command="$nvcc_command $object_files"
-fi
-
-if [ "$cpp_files" ]; then
-  host_command="$host_command $object_files $cpp_files"
-else
-  host_command="$host_command $object_files"
-fi
-
-#Print command for dryrun
-if [ $dry_run -eq 1 ]; then
-  if [ $host_only -eq 1 ]; then
-    echo $host_command
-  else
-    echo $nvcc_command
-  fi
-  exit 0
-fi
-
-#Run compilation command
-if [ $host_only -eq 1 ]; then
-  $host_command
-else
-  $nvcc_command
-fi
-error_code=$?
-
-#Report error code
-exit $error_code
diff --git a/lib/kokkos/config/master_history.txt b/lib/kokkos/config/master_history.txt
index 96b05c02e1fae8d1c4cb9cb914b24786f7b4a1c9..6f9ca897d9f89ec3501db6c9f754d61a9182e511 100644
--- a/lib/kokkos/config/master_history.txt
+++ b/lib/kokkos/config/master_history.txt
@@ -9,3 +9,4 @@ tag:  2.03.00    date: 04:25:2017    master: 120d9ce7    develop: 015ba641
 tag:  2.03.05    date: 05:27:2017    master: 36b92f43    develop: 79073186
 tag:  2.03.13    date: 07:27:2017    master: da314444    develop: 29ccb58a
 tag:  2.04.00    date: 08:16:2017    master: 54eb75c0    develop: 32fb8ee1
+tag:  2.04.04    date: 09:11:2017    master: 2b7e9c20    develop: 51e7b25a
diff --git a/lib/kokkos/config/nvcc_wrapper b/lib/kokkos/config/nvcc_wrapper
deleted file mode 100755
index 3506c3fd378eb78167b512f33167f2a830ed1c34..0000000000000000000000000000000000000000
--- a/lib/kokkos/config/nvcc_wrapper
+++ /dev/null
@@ -1,284 +0,0 @@
-#!/bin/bash
-#
-# This shell script (nvcc_wrapper) wraps both the host compiler and
-# NVCC, if you are building legacy C or C++ code with CUDA enabled.
-# The script remedies some differences between the interface of NVCC
-# and that of the host compiler, in particular for linking.
-# It also means that a legacy code doesn't need separate .cu files;
-# it can just use .cpp files.
-#
-# Default settings: change those according to your machine.  For
-# example, you may have have two different wrappers with either icpc
-# or g++ as their back-end compiler.  The defaults can be overwritten
-# by using the usual arguments (e.g., -arch=sm_30 -ccbin icpc).
-
-default_arch="sm_61"
-#default_arch="sm_50"
-
-#
-# The default C++ compiler.
-#
-host_compiler=${NVCC_WRAPPER_DEFAULT_COMPILER:-"g++"}
-#host_compiler="icpc"
-#host_compiler="/usr/local/gcc/4.8.3/bin/g++"
-#host_compiler="/usr/local/gcc/4.9.1/bin/g++"
-
-#
-# Internal variables
-#
-
-# C++ files
-cpp_files=""
-
-# Host compiler arguments
-xcompiler_args=""
-
-# Cuda (NVCC) only arguments
-cuda_args=""
-
-# Arguments for both NVCC and Host compiler
-shared_args=""
-
-# Linker arguments
-xlinker_args=""
-
-# Object files passable to NVCC
-object_files=""
-
-# Link objects for the host linker only
-object_files_xlinker=""
-
-# Shared libraries with version numbers are not handled correctly by NVCC
-shared_versioned_libraries_host=""
-shared_versioned_libraries=""
-
-# Does the User set the architecture 
-arch_set=0
-
-# Does the user overwrite the host compiler
-ccbin_set=0
-
-#Error code of compilation
-error_code=0
-
-# Do a dry run without actually compiling
-dry_run=0
-
-# Skip NVCC compilation and use host compiler directly
-host_only=0
-
-# Enable workaround for CUDA 6.5 for pragma ident 
-replace_pragma_ident=0
-
-# Mark first host compiler argument
-first_xcompiler_arg=1
-
-temp_dir=${TMPDIR:-/tmp}
-
-# Check if we have an optimization argument already
-optimization_applied=0
-
-#echo "Arguments: $# $@"
-
-while [ $# -gt 0 ]
-do
-  case $1 in
-  #show the executed command
-  --show|--nvcc-wrapper-show)
-    dry_run=1
-    ;;
-  #run host compilation only
-  --host-only)
-    host_only=1
-    ;;
-  #replace '#pragma ident' with '#ident' this is needed to compile OpenMPI due to a configure script bug and a non standardized behaviour of pragma with macros
-  --replace-pragma-ident)
-    replace_pragma_ident=1
-    ;;
-  #handle source files to be compiled as cuda files
-  *.cpp|*.cxx|*.cc|*.C|*.c++|*.cu)
-    cpp_files="$cpp_files $1"
-    ;;
-   # Ensure we only have one optimization flag because NVCC doesn't allow muliple
-  -O*)
-    if [ $optimization_applied -eq 1 ]; then
-       echo "nvcc_wrapper - *warning* you have set multiple optimization flags (-O*), only the first is used because nvcc can only accept a single optimization setting."
-    else
-       shared_args="$shared_args $1"
-       optimization_applied=1
-    fi
-    ;;
-  #Handle shared args (valid for both nvcc and the host compiler)
-  -D*|-c|-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared)
-    shared_args="$shared_args $1"
-    ;;
-  #Handle shared args that have an argument
-  -o|-MT)
-    shared_args="$shared_args $1 $2"
-    shift
-    ;;
-  #Handle known nvcc args
-  -gencode*|--dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage|-Xptxas*)
-    cuda_args="$cuda_args $1"
-    ;;
-  #Handle more known nvcc args
-  --expt-extended-lambda|--expt-relaxed-constexpr)
-    cuda_args="$cuda_args $1"
-    ;;
-  #Handle known nvcc args that have an argument
-  -rdc|-maxrregcount|--default-stream)
-    cuda_args="$cuda_args $1 $2"
-    shift
-    ;;
-  #Handle c++11 setting
-  --std=c++11|-std=c++11)
-    shared_args="$shared_args $1"
-    ;;
-  #strip of -std=c++98 due to nvcc warnings and Tribits will place both -std=c++11 and -std=c++98
-  -std=c++98|--std=c++98)
-    ;;
-  #strip of pedantic because it produces endless warnings about #LINE added by the preprocessor
-  -pedantic|-Wpedantic|-ansi)
-    ;;
-  #strip -Xcompiler because we add it
-  -Xcompiler)
-    if [ $first_xcompiler_arg -eq 1 ]; then
-      xcompiler_args="$2"
-      first_xcompiler_arg=0
-    else
-      xcompiler_args="$xcompiler_args,$2"
-    fi
-    shift
-    ;;
-  #strip of "-x cu" because we add that
-  -x)
-    if [[ $2 != "cu" ]]; then
-      if [ $first_xcompiler_arg -eq 1 ]; then
-        xcompiler_args="-x,$2"
-        first_xcompiler_arg=0
-      else
-        xcompiler_args="$xcompiler_args,-x,$2"
-      fi
-    fi
-    shift
-    ;;
-  #Handle -ccbin (if its not set we can set it to a default value)
-  -ccbin)
-    cuda_args="$cuda_args $1 $2"
-    ccbin_set=1
-    host_compiler=$2
-    shift
-    ;;
-  #Handle -arch argument (if its not set use a default
-  -arch*)
-    cuda_args="$cuda_args $1"
-    arch_set=1
-    ;;
-  #Handle -Xcudafe argument
-  -Xcudafe)
-    cuda_args="$cuda_args -Xcudafe $2"
-    shift
-    ;;
-  #Handle args that should be sent to the linker
-  -Wl*)
-    xlinker_args="$xlinker_args -Xlinker ${1:4:${#1}}"
-    host_linker_args="$host_linker_args ${1:4:${#1}}"
-    ;;
-  #Handle object files: -x cu applies to all input files, so give them to linker, except if only linking
-  *.a|*.so|*.o|*.obj)
-    object_files="$object_files $1"
-    object_files_xlinker="$object_files_xlinker -Xlinker $1"
-    ;;
-  #Handle object files which always need to use "-Xlinker": -x cu applies to all input files, so give them to linker, except if only linking
-  *.dylib)
-    object_files="$object_files -Xlinker $1"
-    object_files_xlinker="$object_files_xlinker -Xlinker $1"
-    ;;
-  #Handle shared libraries with *.so.* names which nvcc can't do.
-  *.so.*)
-    shared_versioned_libraries_host="$shared_versioned_libraries_host $1"
-    shared_versioned_libraries="$shared_versioned_libraries -Xlinker $1"
-  ;;
-  #All other args are sent to the host compiler
-  *)
-    if [ $first_xcompiler_arg -eq 1 ]; then
-      xcompiler_args=$1
-      first_xcompiler_arg=0
-    else 
-      xcompiler_args="$xcompiler_args,$1"
-    fi
-    ;;
-  esac
-
-  shift
-done
-
-#Add default host compiler if necessary
-if [ $ccbin_set -ne 1 ]; then
-  cuda_args="$cuda_args -ccbin $host_compiler"
-fi
-
-#Add architecture command
-if [ $arch_set -ne 1 ]; then
-  cuda_args="$cuda_args -arch=$default_arch"
-fi
-
-#Compose compilation command
-nvcc_command="nvcc $cuda_args $shared_args $xlinker_args $shared_versioned_libraries"
-if [ $first_xcompiler_arg -eq 0 ]; then
-  nvcc_command="$nvcc_command -Xcompiler $xcompiler_args"
-fi
-
-#Compose host only command
-host_command="$host_compiler $shared_args $xcompiler_args $host_linker_args $shared_versioned_libraries_host"
-
-#nvcc does not accept '#pragma ident SOME_MACRO_STRING' but it does accept '#ident SOME_MACRO_STRING'
-if [ $replace_pragma_ident -eq 1 ]; then
-  cpp_files2=""
-  for file in $cpp_files
-  do
-    var=`grep pragma ${file} | grep ident | grep "#"`
-    if [ "${#var}" -gt 0 ]
-    then
-      sed 's/#[\ \t]*pragma[\ \t]*ident/#ident/g' $file > $temp_dir/nvcc_wrapper_tmp_$file
-      cpp_files2="$cpp_files2 $temp_dir/nvcc_wrapper_tmp_$file"
-    else
-      cpp_files2="$cpp_files2 $file"
-    fi
-  done
-  cpp_files=$cpp_files2
-  #echo $cpp_files
-fi
-
-if [ "$cpp_files" ]; then
-  nvcc_command="$nvcc_command $object_files_xlinker -x cu $cpp_files"
-else
-  nvcc_command="$nvcc_command $object_files"
-fi
-
-if [ "$cpp_files" ]; then
-  host_command="$host_command $object_files $cpp_files"
-else
-  host_command="$host_command $object_files"
-fi
-
-#Print command for dryrun
-if [ $dry_run -eq 1 ]; then
-  if [ $host_only -eq 1 ]; then
-    echo $host_command
-  else
-    echo $nvcc_command
-  fi
-  exit 0
-fi
-
-#Run compilation command
-if [ $host_only -eq 1 ]; then
-  $host_command
-else
-  $nvcc_command
-fi
-error_code=$?
-
-#Report error code
-exit $error_code
diff --git a/lib/kokkos/config/trilinos-integration/checkin-test b/lib/kokkos/config/trilinos-integration/checkin-test
index 92a1b1c06882d3ee73e9c27f5054bd3544acdd0e..ffb565fcbbbb85f881053828d34208bd8e4b9e7e 100644
--- a/lib/kokkos/config/trilinos-integration/checkin-test
+++ b/lib/kokkos/config/trilinos-integration/checkin-test
@@ -1,4 +1,4 @@
 module purge
-module load sems-env sems-gcc/4.9.3 sems-openmpi/1.10.1 sems-hdf5/1.8.12/parallel sems-netcdf/4.3.2/parallel sems-python/2.7.9 sems-zlib/1.2.8/base sems-cmake/3.5.2 sems-parmetis/4.0.3/64bit_parallel sems-scotch/6.0.3/nopthread_64bit_parallel sems-boost/1.59.0/base
+module load sems-env sems-gcc/4.9.3 sems-openmpi/1.10.1 sems-hdf5/1.8.12/parallel sems-netcdf/4.3.2/parallel sems-python/2.7.9 sems-zlib/1.2.8/base sems-cmake/3.5.2 sems-parmetis/4.0.3/64bit_parallel sems-scotch/6.0.3/nopthread_64bit_parallel sems-boost/1.63.0/base sems-yaml_cpp sems-superlu
 
 #Run Trilinos CheckinTest
diff --git a/lib/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp b/lib/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp
index 0408472c680c2c2f46e6cd32c8147a0533c28917..996b6b5610230832a962f2bde624f874bbd540cb 100644
--- a/lib/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp
+++ b/lib/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp
@@ -125,6 +125,123 @@ namespace Impl {
   };
 }
 
+/// \class GraphRowViewConst
+/// \brief View of a row of a sparse graph.
+/// \tparam GraphType Sparse graph type, such as (but not limited to) StaticCrsGraph.
+///
+/// This class provides a generic view of a row of a sparse graph.
+/// We intended this class to view a row of a StaticCrsGraph, but
+/// GraphType need not necessarily be CrsMatrix.
+///
+/// The row view is suited for computational kernels like sparse
+/// matrix-vector multiply, as well as for modifying entries in the
+/// sparse matrix.  The view is always const as it does not allow graph modification.
+///
+/// Here is an example loop over the entries in the row:
+/// \code
+/// typedef typename GraphRowViewConst<MatrixType>::ordinal_type ordinal_type;
+///
+/// GraphRowView<GraphType> G_i = ...;
+/// const ordinal_type numEntries = G_i.length;
+/// for (ordinal_type k = 0; k < numEntries; ++k) {
+///   ordinal_type j = G_i.colidx (k);
+///   // ... do something with A_ij and j ...
+/// }
+/// \endcode
+///
+/// GraphType must provide the \c data_type
+/// typedefs. In addition, it must make sense to use GraphRowViewConst to
+/// view a row of GraphType. In particular, column
+/// indices of a row must be accessible using the <tt>entries</tt>
+/// resp. <tt>colidx</tt> arrays given to the constructor of this
+/// class, with a constant <tt>stride</tt> between successive entries.
+/// The stride is one for the compressed sparse row storage format (as
+/// is used by CrsMatrix), but may be greater than one for other
+/// sparse matrix storage formats (e.g., ELLPACK or jagged diagonal).
+template<class GraphType>
+struct GraphRowViewConst {
+  //! The type of the column indices in the row.
+  typedef const typename GraphType::data_type ordinal_type;
+
+private:
+  //! Array of (local) column indices in the row.
+  ordinal_type* colidx_;
+  /// \brief Stride between successive entries in the row.
+  ///
+  /// For compressed sparse row (CSR) storage, this is always one.
+  /// This might be greater than one for storage formats like ELLPACK
+  /// or Jagged Diagonal.  Nevertheless, the stride can never be
+  /// greater than the number of rows or columns in the matrix.  Thus,
+  /// \c ordinal_type is the correct type.
+  const ordinal_type stride_;
+
+public:
+  /// \brief Constructor
+  ///
+  /// \param values [in] Array of the row's values.
+  /// \param colidx [in] Array of the row's column indices.
+  /// \param stride [in] (Constant) stride between matrix entries in
+  ///   each of the above arrays.
+  /// \param count [in] Number of entries in the row.
+  KOKKOS_INLINE_FUNCTION
+  GraphRowViewConst ( ordinal_type* const colidx_in,
+                      const ordinal_type& stride,
+                      const ordinal_type& count) :
+    colidx_ (colidx_in), stride_ (stride), length (count)
+  {}
+
+  /// \brief Constructor with offset into \c colidx array
+  ///
+  /// \param colidx [in] Array of the row's column indices.
+  /// \param stride [in] (Constant) stride between matrix entries in
+  ///   each of the above arrays.
+  /// \param count [in] Number of entries in the row.
+  /// \param idx [in] Start offset into \c colidx array
+  ///
+  /// \tparam OffsetType The type of \c idx (see above).  Must be a
+  ///   built-in integer type.  This may differ from ordinal_type.
+  ///   For example, the matrix may have dimensions that fit in int,
+  ///   but a number of entries that does not fit in int.
+  template<class OffsetType>
+  KOKKOS_INLINE_FUNCTION
+  GraphRowViewConst ( const typename GraphType::entries_type& colidx_in,
+                      const ordinal_type& stride,
+                      const ordinal_type& count,
+                      const OffsetType& idx,
+                      const typename std::enable_if<std::is_integral<OffsetType>::value, int>::type& = 0) :
+    colidx_ (&colidx_in(idx)), stride_ (stride), length (count)
+  {}
+
+  /// \brief Number of entries in the row.
+  ///
+  /// This is a public const field rather than a public const method,
+  /// in order to avoid possible overhead of a method call if the
+  /// compiler is unable to inline that method call.
+  ///
+  /// We assume that rows contain no duplicate entries (i.e., entries
+  /// with the same column index).  Thus, a row may have up to
+  /// A.numCols() entries.  This means that the correct type of
+  /// 'length' is ordinal_type.
+  const ordinal_type length;
+
+  /// \brief (Const) reference to the column index of entry i in this
+  ///   row of the sparse matrix.
+  ///
+  /// "Entry i" is not necessarily the entry with column index i, nor
+  /// does i necessarily correspond to the (local) row index.
+  KOKKOS_INLINE_FUNCTION
+  ordinal_type& colidx (const ordinal_type& i) const {
+    return colidx_[i*stride_];
+  }
+
+  /// \brief An alias for colidx
+  KOKKOS_INLINE_FUNCTION
+  ordinal_type& operator()(const ordinal_type& i) const {
+    return colidx(i);
+  }
+};
+
+
 /// \class StaticCrsGraph
 /// \brief Compressed row storage array.
 ///
@@ -218,6 +335,38 @@ public:
       static_cast<size_type> (0);
   }
 
+  /// \brief Return a const view of row i of the graph.
+  ///
+  /// If row i does not belong to the graph, return an empty view.
+  ///
+  /// The returned object \c view implements the following interface:
+  /// <ul>
+  /// <li> \c view.length is the number of entries in the row </li>
+  /// <li> \c view.colidx(k) returns a const reference to the
+  ///      column index of the k-th entry in the row </li>
+  /// </ul>
+  /// k is not a column index; it just counts from 0 to
+  /// <tt>view.length - 1</tt>.
+  ///
+  /// Users should not rely on the return type of this method.  They
+  /// should instead assign to 'auto'.  That allows compile-time
+  /// polymorphism for different kinds of sparse matrix formats (e.g.,
+  /// ELLPACK or Jagged Diagonal) that we may wish to support in the
+  /// future.
+  KOKKOS_INLINE_FUNCTION
+  GraphRowViewConst<StaticCrsGraph> rowConst (const data_type i) const {
+    const size_type start = row_map(i);
+    // count is guaranteed to fit in ordinal_type, as long as no row
+    // has duplicate entries.
+    const data_type count = static_cast<data_type> (row_map(i+1) - start);
+
+    if (count == 0) {
+      return GraphRowViewConst<StaticCrsGraph> (NULL, 1, 0);
+    } else {
+      return GraphRowViewConst<StaticCrsGraph> (entries, 1, count, start);
+    }
+  }
+
   /**  \brief  Create a row partitioning into a given number of blocks
    *           balancing non-zeros + a fixed cost per row.
    */
diff --git a/lib/kokkos/core/src/Cuda/KokkosExp_Cuda_IterateTile_Refactor.hpp b/lib/kokkos/core/src/Cuda/KokkosExp_Cuda_IterateTile_Refactor.hpp
index 46321378d9b4003c14c0165e0ef077e693a0b26a..c184c14d078fc540b84fa1c47cd6fa15122ce8df 100644
--- a/lib/kokkos/core/src/Cuda/KokkosExp_Cuda_IterateTile_Refactor.hpp
+++ b/lib/kokkos/core/src/Cuda/KokkosExp_Cuda_IterateTile_Refactor.hpp
@@ -91,11 +91,11 @@ struct DeviceIterateTile<2,RP,Functor,void >
     // LL
     if (RP::inner_direction == RP::Left) {
       for ( index_type tile_id1 = (index_type)blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) {
-        const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y;
+        const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y + (index_type)m_rp.m_lower[1];
         if ( offset_1 < m_rp.m_upper[1] && (index_type)threadIdx.y < m_rp.m_tile[1] ) {
 
           for ( index_type tile_id0 = (index_type)blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) {
-            const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x;
+            const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x + (index_type)m_rp.m_lower[0];
             if ( offset_0 < m_rp.m_upper[0] && (index_type)threadIdx.x < m_rp.m_tile[0] ) {
               m_func(offset_0 , offset_1);
             }
@@ -106,11 +106,11 @@ struct DeviceIterateTile<2,RP,Functor,void >
     // LR
     else {
       for ( index_type tile_id0 = (index_type)blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) {
-        const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x;
+        const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x + (index_type)m_rp.m_lower[0];
         if ( offset_0 < m_rp.m_upper[0] && (index_type)threadIdx.x < m_rp.m_tile[0] ) {
 
           for ( index_type tile_id1 = (index_type)blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) {
-            const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y;
+            const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y + (index_type)m_rp.m_lower[1];
             if ( offset_1 < m_rp.m_upper[1] && (index_type)threadIdx.y < m_rp.m_tile[1] ) {
               m_func(offset_0 , offset_1);
             }
@@ -143,11 +143,11 @@ struct DeviceIterateTile<2,RP,Functor,Tag>
     if (RP::inner_direction == RP::Left) {
       // Loop over size maxnumblocks until full range covered
       for ( index_type tile_id1 = (index_type)blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) {
-        const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y;
+        const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y + (index_type)m_rp.m_lower[1];
         if ( offset_1 < m_rp.m_upper[1] && (index_type)threadIdx.y < m_rp.m_tile[1] ) {
 
           for ( index_type tile_id0 = (index_type)blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) {
-            const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x;
+            const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x + (index_type)m_rp.m_lower[0];
             if ( offset_0 < m_rp.m_upper[0] && (index_type)threadIdx.x < m_rp.m_tile[0] ) {
               m_func(Tag(), offset_0 , offset_1);
             }
@@ -157,11 +157,11 @@ struct DeviceIterateTile<2,RP,Functor,Tag>
     }
     else {
       for ( index_type tile_id0 = (index_type)blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) {
-        const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x;
+        const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x + (index_type)m_rp.m_lower[0];
         if ( offset_0 < m_rp.m_upper[0] && (index_type)threadIdx.x < m_rp.m_tile[0] ) {
 
           for ( index_type tile_id1 = (index_type)blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) {
-            const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y;
+            const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y + (index_type)m_rp.m_lower[1];
             if ( offset_1 < m_rp.m_upper[1] && (index_type)threadIdx.y < m_rp.m_tile[1] ) {
               m_func(Tag(), offset_0 , offset_1);
             }
@@ -196,15 +196,15 @@ struct DeviceIterateTile<3,RP,Functor,void >
     // LL
     if (RP::inner_direction == RP::Left) {
       for ( index_type tile_id2 = (index_type)blockIdx.z; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.z ) {
-        const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.z;
+        const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.z + (index_type)m_rp.m_lower[2];
         if ( offset_2 < m_rp.m_upper[2] && (index_type)threadIdx.z < m_rp.m_tile[2] ) {
 
           for ( index_type tile_id1 = (index_type)blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) {
-            const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y;
+            const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y + (index_type)m_rp.m_lower[1];
             if ( offset_1 < m_rp.m_upper[1] && (index_type)threadIdx.y < m_rp.m_tile[1] ) {
 
               for ( index_type tile_id0 = (index_type)blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) {
-                const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x;
+                const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x + (index_type)m_rp.m_lower[0];
                 if ( offset_0 < m_rp.m_upper[0] && (index_type)threadIdx.x < m_rp.m_tile[0] ) {
                   m_func(offset_0 , offset_1 , offset_2);
                 }
@@ -217,15 +217,15 @@ struct DeviceIterateTile<3,RP,Functor,void >
     // LR
     else {
       for ( index_type tile_id0 = (index_type)blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) {
-        const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x;
+        const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x + (index_type)m_rp.m_lower[0];
         if ( offset_0 < m_rp.m_upper[0] && (index_type)threadIdx.x < m_rp.m_tile[0] ) {
 
           for ( index_type tile_id1 = (index_type)blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) {
-            const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y;
+            const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y + (index_type)m_rp.m_lower[1];
             if ( offset_1 < m_rp.m_upper[1] && (index_type)threadIdx.y < m_rp.m_tile[1] ) {
 
               for ( index_type tile_id2 = (index_type)blockIdx.z; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.z ) {
-                const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.z;
+                const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.z + (index_type)m_rp.m_lower[2];
                 if ( offset_2 < m_rp.m_upper[2] && (index_type)threadIdx.z < m_rp.m_tile[2] ) {
                   m_func(offset_0 , offset_1 , offset_2);
                 }
@@ -259,15 +259,15 @@ struct DeviceIterateTile<3,RP,Functor,Tag>
   {
     if (RP::inner_direction == RP::Left) {
       for ( index_type tile_id2 = (index_type)blockIdx.z; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.z ) {
-        const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.z;
+        const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.z + (index_type)m_rp.m_lower[2];
         if ( offset_2 < m_rp.m_upper[2] && (index_type)threadIdx.z < m_rp.m_tile[2] ) {
 
           for ( index_type tile_id1 = (index_type)blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) {
-            const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y;
+            const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y + (index_type)m_rp.m_lower[1];
             if ( offset_1 < m_rp.m_upper[1] && (index_type)threadIdx.y < m_rp.m_tile[1] ) {
 
               for ( index_type tile_id0 = (index_type)blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) {
-                const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x;
+                const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x + (index_type)m_rp.m_lower[0];
                 if ( offset_0 < m_rp.m_upper[0] && (index_type)threadIdx.x < m_rp.m_tile[0] ) {
                   m_func(Tag(), offset_0 , offset_1 , offset_2);
                 }
@@ -279,15 +279,15 @@ struct DeviceIterateTile<3,RP,Functor,Tag>
     }
     else {
       for ( index_type tile_id0 = (index_type)blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) {
-        const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x;
+        const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x + (index_type)m_rp.m_lower[0];
         if ( offset_0 < m_rp.m_upper[0] && (index_type)threadIdx.x < m_rp.m_tile[0] ) {
 
           for ( index_type tile_id1 = (index_type)blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) {
-            const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y;
+            const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y + (index_type)m_rp.m_lower[1];
             if ( offset_1 < m_rp.m_upper[1] && (index_type)threadIdx.y < m_rp.m_tile[1] ) {
 
               for ( index_type tile_id2 = (index_type)blockIdx.z; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.z ) {
-                const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.z;
+                const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.z + (index_type)m_rp.m_lower[2];
                 if ( offset_2 < m_rp.m_upper[2] && (index_type)threadIdx.z < m_rp.m_tile[2] ) {
                   m_func(Tag(), offset_0 , offset_1 , offset_2);
                 }
@@ -340,19 +340,19 @@ struct DeviceIterateTile<4,RP,Functor,void >
       const index_type thr_id1 = (index_type)threadIdx.x / m_rp.m_tile[0];
 
       for ( index_type tile_id3 = (index_type)blockIdx.z; tile_id3 < m_rp.m_tile_end[3]; tile_id3 += gridDim.z ) {
-        const index_type offset_3 = tile_id3*m_rp.m_tile[3] + (index_type)threadIdx.z;
+        const index_type offset_3 = tile_id3*m_rp.m_tile[3] + (index_type)threadIdx.z + (index_type)m_rp.m_lower[3];
         if ( offset_3 < m_rp.m_upper[3] && (index_type)threadIdx.z < m_rp.m_tile[3] ) {
 
           for ( index_type tile_id2 = (index_type)blockIdx.y; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.y ) {
-            const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.y;
+            const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.y + (index_type)m_rp.m_lower[2];
             if ( offset_2 < m_rp.m_upper[2] && (index_type)threadIdx.y < m_rp.m_tile[2] ) {
 
               for ( index_type j = tile_id1 ; j < m_rp.m_tile_end[1]; j += numbl1 ) {
-                const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1;
+                const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1 + (index_type)m_rp.m_lower[1];
                 if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) {
 
                   for ( index_type i = tile_id0 ; i < m_rp.m_tile_end[0]; i += numbl0 ) {
-                    const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0;
+                    const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0 + (index_type)m_rp.m_lower[0];
                     if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) {
                       m_func(offset_0 , offset_1 , offset_2 , offset_3);
                     }
@@ -378,19 +378,19 @@ struct DeviceIterateTile<4,RP,Functor,void >
       const index_type thr_id1 = (index_type)threadIdx.x % m_rp.m_tile[1];
 
       for ( index_type i = tile_id0; i < m_rp.m_tile_end[0]; i += numbl0 ) {
-        const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0;
+        const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0 + (index_type)m_rp.m_lower[0];
         if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) {
 
           for ( index_type j = tile_id1; j < m_rp.m_tile_end[1]; j += numbl1 ) {
-            const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1;
+            const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1 + (index_type)m_rp.m_lower[1];
             if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) {
 
               for ( index_type tile_id2 = (index_type)blockIdx.y; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.y ) {
-                const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.y;
+                const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.y + (index_type)m_rp.m_lower[2];
                 if ( offset_2 < m_rp.m_upper[2] && (index_type)threadIdx.y < m_rp.m_tile[2] ) {
 
                   for ( index_type tile_id3 = (index_type)blockIdx.z; tile_id3 < m_rp.m_tile_end[3]; tile_id3 += gridDim.z ) {
-                    const index_type offset_3 = tile_id3*m_rp.m_tile[3] + (index_type)threadIdx.z;
+                    const index_type offset_3 = tile_id3*m_rp.m_tile[3] + (index_type)threadIdx.z + (index_type)m_rp.m_lower[3];
                     if ( offset_3 < m_rp.m_upper[3] && (index_type)threadIdx.z < m_rp.m_tile[3] ) {
                       m_func(offset_0 , offset_1 , offset_2 , offset_3);
                     }
@@ -442,19 +442,19 @@ struct DeviceIterateTile<4,RP,Functor,Tag>
       const index_type thr_id1 = (index_type)threadIdx.x / m_rp.m_tile[0];
 
       for ( index_type tile_id3 = (index_type)blockIdx.z; tile_id3 < m_rp.m_tile_end[3]; tile_id3 += gridDim.z ) {
-        const index_type offset_3 = tile_id3*m_rp.m_tile[3] + (index_type)threadIdx.z;
+        const index_type offset_3 = tile_id3*m_rp.m_tile[3] + (index_type)threadIdx.z + (index_type)m_rp.m_lower[3];
         if ( offset_3 < m_rp.m_upper[3] && (index_type)threadIdx.z < m_rp.m_tile[3] ) {
 
           for ( index_type tile_id2 = (index_type)blockIdx.y; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.y ) {
-            const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.y;
+            const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.y + (index_type)m_rp.m_lower[2];
             if ( offset_2 < m_rp.m_upper[2] && (index_type)threadIdx.y < m_rp.m_tile[2] ) {
 
               for ( index_type j = tile_id1; j < m_rp.m_tile_end[1]; j += numbl1 ) {
-                const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1;
+                const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1 + (index_type)m_rp.m_lower[1];
                 if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) {
 
                   for ( index_type i = tile_id0; i < m_rp.m_tile_end[0]; i += numbl0 ) {
-                    const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0;
+                    const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0 + (index_type)m_rp.m_lower[0];
                     if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) {
                       m_func(Tag(), offset_0 , offset_1 , offset_2 , offset_3);
                     }
@@ -479,19 +479,19 @@ struct DeviceIterateTile<4,RP,Functor,Tag>
       const index_type thr_id1 = (index_type)threadIdx.x % m_rp.m_tile[1];
 
       for ( index_type i = tile_id0; i < m_rp.m_tile_end[0]; i += numbl0 ) {
-        const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0;
+        const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0 + (index_type)m_rp.m_lower[0];
         if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) {
 
           for ( index_type j = tile_id1; j < m_rp.m_tile_end[1]; j += numbl1 ) {
-            const index_type offset_1 = tile_id1*m_rp.m_tile[1] + thr_id1;
+            const index_type offset_1 = tile_id1*m_rp.m_tile[1] + thr_id1 + (index_type)m_rp.m_lower[1];
             if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) {
 
               for ( index_type tile_id2 = (index_type)blockIdx.y; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.y ) {
-                const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.y;
+                const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.y + (index_type)m_rp.m_lower[2];
                 if ( offset_2 < m_rp.m_upper[2] && (index_type)threadIdx.y < m_rp.m_tile[2] ) {
 
                   for ( index_type tile_id3 = (index_type)blockIdx.z; tile_id3 < m_rp.m_tile_end[3]; tile_id3 += gridDim.z ) {
-                    const index_type offset_3 = tile_id3*m_rp.m_tile[3] + (index_type)threadIdx.z;
+                    const index_type offset_3 = tile_id3*m_rp.m_tile[3] + (index_type)threadIdx.z + (index_type)m_rp.m_lower[3];
                     if ( offset_3 < m_rp.m_upper[3] && (index_type)threadIdx.z < m_rp.m_tile[3] ) {
                       m_func(Tag() , offset_0 , offset_1 , offset_2 , offset_3);
                     }
@@ -558,23 +558,23 @@ struct DeviceIterateTile<5,RP,Functor,void >
       const index_type thr_id3 = (index_type)threadIdx.y / m_rp.m_tile[2];
 
       for ( index_type tile_id4 = (index_type)blockIdx.z; tile_id4 < m_rp.m_tile_end[4]; tile_id4 += gridDim.z ) {
-        const index_type offset_4 = tile_id4*m_rp.m_tile[4] + (index_type)threadIdx.z;
+        const index_type offset_4 = tile_id4*m_rp.m_tile[4] + (index_type)threadIdx.z + (index_type)m_rp.m_lower[4];
         if ( offset_4 < m_rp.m_upper[4] && (index_type)threadIdx.z < m_rp.m_tile[4] ) {
 
           for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) {
-            const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3;
+            const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3 + (index_type)m_rp.m_lower[3];
             if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) {
 
               for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) {
-                const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2;
+                const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2 + (index_type)m_rp.m_lower[2];
                 if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) {
 
                   for ( index_type j = tile_id1 ; j < m_rp.m_tile_end[1]; j += numbl1 ) {
-                    const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1;
+                    const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1 + (index_type)m_rp.m_lower[1];
                     if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) {
 
                       for ( index_type i = tile_id0 ; i < m_rp.m_tile_end[0]; i += numbl0 ) {
-                        const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0;
+                        const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0 + (index_type)m_rp.m_lower[0];
                         if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) {
                           m_func(offset_0 , offset_1 , offset_2 , offset_3, offset_4);
                         }
@@ -613,23 +613,23 @@ struct DeviceIterateTile<5,RP,Functor,void >
       const index_type thr_id3 = (index_type)threadIdx.y % m_rp.m_tile[3];
 
       for ( index_type i = tile_id0; i < m_rp.m_tile_end[0]; i += numbl0 ) {
-        const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0;
+        const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0 + (index_type)m_rp.m_lower[0];
         if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) {
 
           for ( index_type j = tile_id1; j < m_rp.m_tile_end[1]; j += numbl1 ) {
-            const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1;
+            const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1 + (index_type)m_rp.m_lower[1];
             if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) {
 
               for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) {
-                const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2;
+                const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2 + (index_type)m_rp.m_lower[2];
                 if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) {
 
                   for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) {
-                    const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3;
+                    const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3 + (index_type)m_rp.m_lower[3];
                     if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) {
 
                       for ( index_type tile_id4 = (index_type)blockIdx.z; tile_id4 < m_rp.m_tile_end[4]; tile_id4 += gridDim.z ) {
-                        const index_type offset_4 = tile_id4*m_rp.m_tile[4] + (index_type)threadIdx.z;
+                        const index_type offset_4 = tile_id4*m_rp.m_tile[4] + (index_type)threadIdx.z + (index_type)m_rp.m_lower[4];
                         if ( offset_4 < m_rp.m_upper[4] && (index_type)threadIdx.z < m_rp.m_tile[4] ) {
                           m_func(offset_0 , offset_1 , offset_2 , offset_3 , offset_4);
                         }
@@ -695,23 +695,23 @@ struct DeviceIterateTile<5,RP,Functor,Tag>
       const index_type thr_id3 = (index_type)threadIdx.y / m_rp.m_tile[2];
 
       for ( index_type tile_id4 = (index_type)blockIdx.z; tile_id4 < m_rp.m_tile_end[4]; tile_id4 += gridDim.z ) {
-        const index_type offset_4 = tile_id4*m_rp.m_tile[4] + (index_type)threadIdx.z;
+        const index_type offset_4 = tile_id4*m_rp.m_tile[4] + (index_type)threadIdx.z + (index_type)m_rp.m_lower[4];
         if ( offset_4 < m_rp.m_upper[4] && (index_type)threadIdx.z < m_rp.m_tile[4] ) {
 
           for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) {
-            const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3;
+            const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3 + (index_type)m_rp.m_lower[3];
             if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) {
 
               for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) {
-                const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2;
+                const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2 + (index_type)m_rp.m_lower[2];
                 if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) {
 
                   for ( index_type j = tile_id1 ; j < m_rp.m_tile_end[1]; j += numbl1 ) {
-                    const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1;
+                    const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1 + (index_type)m_rp.m_lower[1];
                     if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) {
 
                       for ( index_type i = tile_id0 ; i < m_rp.m_tile_end[0]; i += numbl0 ) {
-                        const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0;
+                        const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0 + (index_type)m_rp.m_lower[0];
                         if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) {
                           m_func(Tag() , offset_0 , offset_1 , offset_2 , offset_3, offset_4);
                         }
@@ -750,23 +750,23 @@ struct DeviceIterateTile<5,RP,Functor,Tag>
       const index_type thr_id3 = (index_type)threadIdx.y % m_rp.m_tile[3];
 
       for ( index_type i = tile_id0; i < m_rp.m_tile_end[0]; i += numbl0 ) {
-        const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0;
+        const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0 + (index_type)m_rp.m_lower[0];
         if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) {
 
           for ( index_type j = tile_id1; j < m_rp.m_tile_end[1]; j += numbl1 ) {
-            const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1;
+            const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1 + (index_type)m_rp.m_lower[1];
             if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) {
 
               for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) {
-                const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2;
+                const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2 + (index_type)m_rp.m_lower[2];
                 if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) {
 
                   for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) {
-                    const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3;
+                    const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3 + (index_type)m_rp.m_lower[3];
                     if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) {
 
                       for ( index_type tile_id4 = (index_type)blockIdx.z; tile_id4 < m_rp.m_tile_end[4]; tile_id4 += gridDim.z ) {
-                        const index_type offset_4 = tile_id4*m_rp.m_tile[4] + (index_type)threadIdx.z;
+                        const index_type offset_4 = tile_id4*m_rp.m_tile[4] + (index_type)threadIdx.z + (index_type)m_rp.m_lower[4];
                         if ( offset_4 < m_rp.m_upper[4] && (index_type)threadIdx.z < m_rp.m_tile[4] ) {
                           m_func(Tag() , offset_0 , offset_1 , offset_2 , offset_3 , offset_4);
                         }
@@ -845,27 +845,27 @@ struct DeviceIterateTile<6,RP,Functor,void >
       const index_type thr_id5 = (index_type)threadIdx.z / m_rp.m_tile[4];
 
       for ( index_type n = tile_id5; n < m_rp.m_tile_end[5]; n += numbl5 ) {
-        const index_type offset_5 = n*m_rp.m_tile[5] + thr_id5;
+        const index_type offset_5 = n*m_rp.m_tile[5] + thr_id5 + (index_type)m_rp.m_lower[5];
         if ( offset_5 < m_rp.m_upper[5] && thr_id5 < m_rp.m_tile[5] ) {
 
           for ( index_type m = tile_id4; m < m_rp.m_tile_end[4]; m += numbl4 ) {
-            const index_type offset_4 = m*m_rp.m_tile[4] + thr_id4;
+            const index_type offset_4 = m*m_rp.m_tile[4] + thr_id4 + (index_type)m_rp.m_lower[4];
             if ( offset_4 < m_rp.m_upper[4] && thr_id4 < m_rp.m_tile[4] ) {
 
               for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) {
-                const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3;
+                const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3 + (index_type)m_rp.m_lower[3];
                 if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) {
 
                   for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) {
-                    const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2;
+                    const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2 + (index_type)m_rp.m_lower[2];
                     if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) {
 
                       for ( index_type j = tile_id1 ; j < m_rp.m_tile_end[1]; j += numbl1 ) {
-                        const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1;
+                        const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1 + (index_type)m_rp.m_lower[1];
                         if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) {
 
                           for ( index_type i = tile_id0 ; i < m_rp.m_tile_end[0]; i += numbl0 ) {
-                            const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0;
+                            const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0 + (index_type)m_rp.m_lower[0];
                             if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) {
                               m_func(offset_0 , offset_1 , offset_2 , offset_3, offset_4, offset_5);
                             }
@@ -917,27 +917,27 @@ struct DeviceIterateTile<6,RP,Functor,void >
       const index_type thr_id5 = (index_type)threadIdx.z % m_rp.m_tile[5];
 
       for ( index_type i = tile_id0; i < m_rp.m_tile_end[0]; i += numbl0 ) {
-        const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0;
+        const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0 + (index_type)m_rp.m_lower[0];
         if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) {
 
           for ( index_type j = tile_id1; j < m_rp.m_tile_end[1]; j += numbl1 ) {
-            const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1;
+            const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1 + (index_type)m_rp.m_lower[1];
             if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) {
 
               for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) {
-                const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2;
+                const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2 + (index_type)m_rp.m_lower[2];
                 if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) {
 
                   for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) {
-                    const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3;
+                    const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3 + (index_type)m_rp.m_lower[3];
                     if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) {
 
                       for ( index_type m = tile_id4; m < m_rp.m_tile_end[4]; m += numbl4 ) {
-                        const index_type offset_4 = m*m_rp.m_tile[4] + thr_id4;
+                        const index_type offset_4 = m*m_rp.m_tile[4] + thr_id4 + (index_type)m_rp.m_lower[4];
                         if ( offset_4 < m_rp.m_upper[4] && thr_id4 < m_rp.m_tile[4] ) {
 
                           for ( index_type n = tile_id5; n < m_rp.m_tile_end[5]; n += numbl5 ) {
-                            const index_type offset_5 = n*m_rp.m_tile[5] + thr_id5;
+                            const index_type offset_5 = n*m_rp.m_tile[5] + thr_id5 + (index_type)m_rp.m_lower[5];
                             if ( offset_5 < m_rp.m_upper[5] && thr_id5 < m_rp.m_tile[5] ) {
                               m_func(offset_0 , offset_1 , offset_2 , offset_3 , offset_4 , offset_5);
                             }
@@ -1016,27 +1016,27 @@ struct DeviceIterateTile<6,RP,Functor,Tag>
       const index_type thr_id5 = (index_type)threadIdx.z / m_rp.m_tile[4];
 
       for ( index_type n = tile_id5; n < m_rp.m_tile_end[5]; n += numbl5 ) {
-        const index_type offset_5 = n*m_rp.m_tile[5] + thr_id5;
+        const index_type offset_5 = n*m_rp.m_tile[5] + thr_id5 + (index_type)m_rp.m_lower[5];
         if ( offset_5 < m_rp.m_upper[5] && thr_id5 < m_rp.m_tile[5] ) {
 
           for ( index_type m = tile_id4; m < m_rp.m_tile_end[4]; m += numbl4 ) {
-            const index_type offset_4 = m*m_rp.m_tile[4] + thr_id4;
+            const index_type offset_4 = m*m_rp.m_tile[4] + thr_id4 + (index_type)m_rp.m_lower[4];
             if ( offset_4 < m_rp.m_upper[4] && thr_id4 < m_rp.m_tile[4] ) {
 
               for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) {
-                const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3;
+                const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3 + (index_type)m_rp.m_lower[3];
                 if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) {
 
                   for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) {
-                    const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2;
+                    const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2 + (index_type)m_rp.m_lower[2];
                     if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) {
 
                       for ( index_type j = tile_id1 ; j < m_rp.m_tile_end[1]; j += numbl1 ) {
-                        const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1;
+                        const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1 + (index_type)m_rp.m_lower[1];
                         if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) {
 
                           for ( index_type i = tile_id0 ; i < m_rp.m_tile_end[0]; i += numbl0 ) {
-                            const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0;
+                            const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0 + (index_type)m_rp.m_lower[0];
                             if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) {
                               m_func(Tag() , offset_0 , offset_1 , offset_2 , offset_3, offset_4, offset_5);
                             }
@@ -1088,27 +1088,27 @@ struct DeviceIterateTile<6,RP,Functor,Tag>
       const index_type thr_id5 = (index_type)threadIdx.z % m_rp.m_tile[5];
 
       for ( index_type i = tile_id0; i < m_rp.m_tile_end[0]; i += numbl0 ) {
-        const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0;
+        const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0 + (index_type)m_rp.m_lower[0];
         if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) {
 
           for ( index_type j = tile_id1; j < m_rp.m_tile_end[1]; j += numbl1 ) {
-            const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1;
+            const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1 + (index_type)m_rp.m_lower[1];
             if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) {
 
               for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) {
-                const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2;
+                const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2 + (index_type)m_rp.m_lower[2];
                 if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) {
 
                   for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) {
-                    const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3;
+                    const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3 + (index_type)m_rp.m_lower[3];
                     if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) {
 
                       for ( index_type m = tile_id4; m < m_rp.m_tile_end[4]; m += numbl4 ) {
-                        const index_type offset_4 = m*m_rp.m_tile[4] + thr_id4;
+                        const index_type offset_4 = m*m_rp.m_tile[4] + thr_id4 + (index_type)m_rp.m_lower[4];
                         if ( offset_4 < m_rp.m_upper[4] && thr_id4 < m_rp.m_tile[4] ) {
 
                           for ( index_type n = tile_id5; n < m_rp.m_tile_end[5]; n += numbl5 ) {
-                            const index_type offset_5 = n*m_rp.m_tile[5] + thr_id5;
+                            const index_type offset_5 = n*m_rp.m_tile[5] + thr_id5 + (index_type)m_rp.m_lower[5];
                             if ( offset_5 < m_rp.m_upper[5] && thr_id5 < m_rp.m_tile[5] ) {
                               m_func(Tag() , offset_0 , offset_1 , offset_2 , offset_3 , offset_4 , offset_5);
                             }
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp b/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp
index cae8ecd489f7917fd3ccc1c0f6628000f6351773..079d9f0889b4e36090593d5458a884545c7635ae 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp
@@ -164,7 +164,7 @@ static void cuda_parallel_launch_constant_memory()
 
 template< class DriverType, unsigned int maxTperB, unsigned int minBperSM >
 __global__
-__launch_bounds__(maxTperB, minBperSM)
+//__launch_bounds__(maxTperB, minBperSM)
 static void cuda_parallel_launch_constant_memory()
 {
   const DriverType & driver =
@@ -182,7 +182,7 @@ static void cuda_parallel_launch_local_memory( const DriverType driver )
 
 template< class DriverType, unsigned int maxTperB, unsigned int minBperSM >
 __global__
-__launch_bounds__(maxTperB, minBperSM)
+//__launch_bounds__(maxTperB, minBperSM)
 static void cuda_parallel_launch_local_memory( const DriverType driver )
 {
   driver();
diff --git a/lib/kokkos/core/src/Kokkos_Complex.hpp b/lib/kokkos/core/src/Kokkos_Complex.hpp
index 26b47a8b749e7340692e5d9a6a13273cb0e0f8f4..f8355f0d069f6399db0fc78e59f68d10e9e183ed 100644
--- a/lib/kokkos/core/src/Kokkos_Complex.hpp
+++ b/lib/kokkos/core/src/Kokkos_Complex.hpp
@@ -242,45 +242,89 @@ public:
     re_ = v;
   }
 
+  template<typename InputRealType>
   KOKKOS_INLINE_FUNCTION
-  complex<RealType>& operator += (const complex<RealType>& src) {
+  complex<RealType>&
+  operator += (const complex<InputRealType>& src) {
+    static_assert(std::is_convertible<InputRealType,RealType>::value, 
+                  "InputRealType must be convertible to RealType");
     re_ += src.re_;
     im_ += src.im_;
     return *this;
   }
 
+  template<typename InputRealType>
   KOKKOS_INLINE_FUNCTION
-  void operator += (const volatile complex<RealType>& src) volatile {
+  void
+  operator += (const volatile complex<InputRealType>& src) volatile {
+    static_assert(std::is_convertible<InputRealType,RealType>::value, 
+                  "InputRealType must be convertible to RealType");
     re_ += src.re_;
     im_ += src.im_;
   }
 
   KOKKOS_INLINE_FUNCTION
-  complex<RealType>& operator += (const RealType& src) {
-    re_ += src;
+  complex<RealType>&
+  operator += (const std::complex<RealType>& src) {
+    re_ += src.real();
+    im_ += src.imag();
     return *this;
   }
 
+  template<typename InputRealType>
   KOKKOS_INLINE_FUNCTION
-  void operator += (const volatile RealType& src) volatile {
+  complex<RealType>&
+  operator += (const InputRealType& src) {
+    static_assert(std::is_convertible<InputRealType,RealType>::value, 
+                  "InputRealType must be convertible to RealType");
     re_ += src;
+    return *this;
   }
 
+  template<typename InputRealType>
   KOKKOS_INLINE_FUNCTION
-  complex<RealType>& operator -= (const complex<RealType>& src) {
+  void
+  operator += (const volatile InputRealType& src) volatile {
+    static_assert(std::is_convertible<InputRealType,RealType>::value, 
+                  "InputRealType must be convertible to RealType");
+    re_ += src;
+  }
+  
+  template<typename InputRealType>
+  KOKKOS_INLINE_FUNCTION
+  complex<RealType>&
+  operator -= (const complex<InputRealType>& src) {
+    static_assert(std::is_convertible<InputRealType,RealType>::value, 
+                  "InputRealType must be convertible to RealType");
     re_ -= src.re_;
     im_ -= src.im_;
     return *this;
   }
 
   KOKKOS_INLINE_FUNCTION
-  complex<RealType>& operator -= (const RealType& src) {
+  complex<RealType>&
+  operator -= (const std::complex<RealType>& src) {
+    re_ -= src.real();
+    im_ -= src.imag();
+    return *this;
+  }
+
+  template<typename InputRealType>
+  KOKKOS_INLINE_FUNCTION
+  complex<RealType>&
+  operator -= (const InputRealType& src) {
+    static_assert(std::is_convertible<InputRealType,RealType>::value, 
+                  "InputRealType must be convertible to RealType");
     re_ -= src;
     return *this;
   }
 
+  template<typename InputRealType>
   KOKKOS_INLINE_FUNCTION
-  complex<RealType>& operator *= (const complex<RealType>& src) {
+  complex<RealType>&
+  operator *= (const complex<InputRealType>& src) {
+    static_assert(std::is_convertible<InputRealType,RealType>::value, 
+                  "InputRealType must be convertible to RealType");
     const RealType realPart = re_ * src.re_ - im_ * src.im_;
     const RealType imagPart = re_ * src.im_ + im_ * src.re_;
     re_ = realPart;
@@ -288,8 +332,12 @@ public:
     return *this;
   }
 
+  template<typename InputRealType>
   KOKKOS_INLINE_FUNCTION
-  void operator *= (const volatile complex<RealType>& src) volatile {
+  void
+  operator *= (const volatile complex<InputRealType>& src) volatile {
+    static_assert(std::is_convertible<InputRealType,RealType>::value, 
+                  "InputRealType must be convertible to RealType");
     const RealType realPart = re_ * src.re_ - im_ * src.im_;
     const RealType imagPart = re_ * src.im_ + im_ * src.re_;
     re_ = realPart;
@@ -297,20 +345,70 @@ public:
   }
 
   KOKKOS_INLINE_FUNCTION
-  complex<RealType>& operator *= (const RealType& src) {
+  complex<RealType>&
+  operator *= (const std::complex<RealType>& src) {
+    const RealType realPart = re_ * src.real() - im_ * src.imag();
+    const RealType imagPart = re_ * src.imag() + im_ * src.real();
+    re_ = realPart;
+    im_ = imagPart;
+    return *this;
+  }
+
+  template<typename InputRealType>
+  KOKKOS_INLINE_FUNCTION
+  complex<RealType>&
+  operator *= (const InputRealType& src) {
+    static_assert(std::is_convertible<InputRealType,RealType>::value, 
+                  "InputRealType must be convertible to RealType");
     re_ *= src;
     im_ *= src;
     return *this;
   }
 
+  template<typename InputRealType>
   KOKKOS_INLINE_FUNCTION
-  void operator *= (const volatile RealType& src) volatile {
+  void
+  operator *= (const volatile InputRealType& src) volatile {
+    static_assert(std::is_convertible<InputRealType,RealType>::value, 
+                  "InputRealType must be convertible to RealType");
     re_ *= src;
     im_ *= src;
   }
 
+  template<typename InputRealType>
+  KOKKOS_INLINE_FUNCTION
+  complex<RealType>&
+  operator /= (const complex<InputRealType>& y) {
+    static_assert(std::is_convertible<InputRealType,RealType>::value, 
+                  "InputRealType must be convertible to RealType");
+
+    // Scale (by the "1-norm" of y) to avoid unwarranted overflow.
+    // If the real part is +/-Inf and the imaginary part is -/+Inf,
+    // this won't change the result.
+    const RealType s = std::fabs (y.real ()) + std::fabs (y.imag ());
+
+    // If s is 0, then y is zero, so x/y == real(x)/0 + i*imag(x)/0.
+    // In that case, the relation x/y == (x/s) / (y/s) doesn't hold,
+    // because y/s is NaN.
+    if (s == 0.0) {
+      this->re_ /= s;
+      this->im_ /= s;
+    }
+    else {
+      const complex<RealType> x_scaled (this->re_ / s, this->im_ / s);
+      const complex<RealType> y_conj_scaled (y.re_ / s, -(y.im_) / s);
+      const RealType y_scaled_abs = y_conj_scaled.re_ * y_conj_scaled.re_ +
+        y_conj_scaled.im_ * y_conj_scaled.im_; // abs(y) == abs(conj(y))
+      *this = x_scaled * y_conj_scaled;
+      *this /= y_scaled_abs;
+    }
+    return *this;
+  }
+  
   KOKKOS_INLINE_FUNCTION
-  complex<RealType>& operator /= (const complex<RealType>& y) {
+  complex<RealType>&
+  operator /= (const std::complex<RealType>& y) {
+
     // Scale (by the "1-norm" of y) to avoid unwarranted overflow.
     // If the real part is +/-Inf and the imaginary part is -/+Inf,
     // this won't change the result.
@@ -334,57 +432,95 @@ public:
     return *this;
   }
 
+
+  template<typename InputRealType>
   KOKKOS_INLINE_FUNCTION
-  complex<RealType>& operator /= (const RealType& src) {
+  complex<RealType>&
+  operator /= (const InputRealType& src) {
+    static_assert(std::is_convertible<InputRealType,RealType>::value, 
+                  "InputRealType must be convertible to RealType");
+
     re_ /= src;
     im_ /= src;
     return *this;
   }
 
+  template<typename InputRealType>
+  KOKKOS_INLINE_FUNCTION
+  bool
+  operator == (const complex<InputRealType>& src) {
+    static_assert(std::is_convertible<InputRealType,RealType>::value, 
+                  "InputRealType must be convertible to RealType");
+
+    return (re_ == static_cast<RealType>(src.re_)) && (im_ == static_cast<RealType>(src.im_));
+  }
+
   KOKKOS_INLINE_FUNCTION
-  bool operator == (const complex<RealType>& src) {
-    return (re_ == src.re_) && (im_ == src.im_);
+  bool
+  operator == (const std::complex<RealType>& src) {
+    return (re_ == src.real()) && (im_ == src.imag());
   }
 
+  template<typename InputRealType>
   KOKKOS_INLINE_FUNCTION
-  bool operator == (const RealType src) {
-    return (re_ == src) && (im_ == RealType(0));
+  bool
+  operator == (const InputRealType src) {
+    static_assert(std::is_convertible<InputRealType,RealType>::value, 
+                  "InputRealType must be convertible to RealType");
+
+    return (re_ == static_cast<RealType>(src)) && (im_ == RealType(0));
   }
 
+  template<typename InputRealType>
   KOKKOS_INLINE_FUNCTION
-  bool operator != (const complex<RealType>& src) {
-    return (re_ != src.re_) || (im_ != src.im_);
+  bool
+  operator != (const complex<InputRealType>& src) {
+    static_assert(std::is_convertible<InputRealType,RealType>::value, 
+                  "InputRealType must be convertible to RealType");
+
+    return (re_ != static_cast<RealType>(src.re_)) || (im_ != static_cast<RealType>(src.im_));
   }
 
   KOKKOS_INLINE_FUNCTION
-  bool operator != (const RealType src) {
-    return (re_ != src) || (im_ != RealType(0));
+  bool
+  operator != (const std::complex<RealType>& src) {
+    return (re_ != src.real()) || (im_ != src.imag());
   }
 
+  template<typename InputRealType>
+  KOKKOS_INLINE_FUNCTION
+  bool
+  operator != (const InputRealType src) {
+    static_assert(std::is_convertible<InputRealType,RealType>::value, 
+                  "InputRealType must be convertible to RealType");
+
+    return (re_ != static_cast<RealType>(src)) || (im_ != RealType(0));
+  }
+  
 };
 
 //! Binary + operator for complex complex.
-template<class RealType>
+template<class RealType1, class RealType2>
 KOKKOS_INLINE_FUNCTION
-complex<RealType>
-operator + (const complex<RealType>& x, const complex<RealType>& y) {
-  return complex<RealType> (x.real () + y.real (), x.imag () + y.imag ());
+complex<typename std::common_type<RealType1,RealType2>::type>
+operator + (const complex<RealType1>& x, const complex<RealType2>& y) {
+  return complex<typename std::common_type<RealType1,RealType2>::type > (x.real () + y.real (), x.imag () + y.imag ());
 }
 
 //! Binary + operator for complex scalar.
-template<class RealType>
+template<class RealType1, class RealType2>
 KOKKOS_INLINE_FUNCTION
-complex<RealType>
-operator + (const complex<RealType>& x, const RealType& y) {
-  return complex<RealType> (x.real () + y , x.imag ());
+complex<typename std::common_type<RealType1,RealType2>::type>
+operator + (const complex<RealType1>& x, const RealType2& y) {
+  return complex<typename std::common_type<RealType1,RealType2>::type> (x.real () + y , x.imag ());
 }
 
 //! Binary + operator for scalar complex.
-template<class RealType>
+template<class RealType1, class RealType2>
 KOKKOS_INLINE_FUNCTION
-complex<RealType>
-operator + (const RealType& x, const complex<RealType>& y) {
-  return complex<RealType> (x + y.real (), y.imag ());
+complex<typename std::common_type<RealType1,RealType2>::type>
+operator + (const RealType1& x, const complex<RealType2>& y) {
+  return complex<typename std::common_type<RealType1,RealType2>::type> (x + y.real (), y.imag ());
 }
 
 //! Unary + operator for complex.
@@ -396,27 +532,27 @@ operator + (const complex<RealType>& x) {
 }
 
 //! Binary - operator for complex.
-template<class RealType>
+template<class RealType1, class RealType2>
 KOKKOS_INLINE_FUNCTION
-complex<RealType>
-operator - (const complex<RealType>& x, const complex<RealType>& y) {
-  return complex<RealType> (x.real () - y.real (), x.imag () - y.imag ());
+complex<typename std::common_type<RealType1,RealType2>::type>
+operator - (const complex<RealType1>& x, const complex<RealType2>& y) {
+  return complex<typename std::common_type<RealType1,RealType2>::type> (x.real () - y.real (), x.imag () - y.imag ());
 }
 
 //! Binary - operator for complex scalar.
-template<class RealType>
+template<class RealType1, class RealType2>
 KOKKOS_INLINE_FUNCTION
-complex<RealType>
-operator - (const complex<RealType>& x, const RealType& y) {
-  return complex<RealType> (x.real () - y , x.imag ());
+complex<typename std::common_type<RealType1,RealType2>::type>
+operator - (const complex<RealType1>& x, const RealType2& y) {
+  return complex<typename std::common_type<RealType1,RealType2>::type> (x.real () - y , x.imag ());
 }
 
 //! Binary - operator for scalar complex.
-template<class RealType>
+template<class RealType1, class RealType2>
 KOKKOS_INLINE_FUNCTION
-complex<RealType>
-operator - (const RealType& x, const complex<RealType>& y) {
-  return complex<RealType> (x - y.real (), - y.imag ());
+complex<typename std::common_type<RealType1,RealType2>::type>
+operator - (const RealType1& x, const complex<RealType2>& y) {
+  return complex<typename std::common_type<RealType1,RealType2>::type> (x - y.real (), - y.imag ());
 }
 
 //! Unary - operator for complex.
@@ -428,12 +564,12 @@ operator - (const complex<RealType>& x) {
 }
 
 //! Binary * operator for complex.
-template<class RealType>
+template<class RealType1, class RealType2>
 KOKKOS_INLINE_FUNCTION
-complex<RealType>
-operator * (const complex<RealType>& x, const complex<RealType>& y) {
-  return complex<RealType> (x.real () * y.real () - x.imag () * y.imag (),
-                            x.real () * y.imag () + x.imag () * y.real ());
+complex<typename std::common_type<RealType1,RealType2>::type>
+operator * (const complex<RealType1>& x, const complex<RealType2>& y) {
+  return complex<typename std::common_type<RealType1,RealType2>::type> (x.real () * y.real () - x.imag () * y.imag (),
+                                                                        x.real () * y.imag () + x.imag () * y.real ());
 }
 
 /// \brief Binary * operator for std::complex and complex.
@@ -446,33 +582,34 @@ operator * (const complex<RealType>& x, const complex<RealType>& y) {
 /// This function cannot be called in a CUDA device function, because
 /// std::complex's methods and nonmember functions are not marked as
 /// CUDA device functions.
-template<class RealType>
-complex<RealType>
-operator * (const std::complex<RealType>& x, const complex<RealType>& y) {
-  return complex<RealType> (x.real () * y.real () - x.imag () * y.imag (),
-                            x.real () * y.imag () + x.imag () * y.real ());
+template<class RealType1, class RealType2>
+inline
+complex<typename std::common_type<RealType1,RealType2>::type>
+operator * (const std::complex<RealType1>& x, const complex<RealType2>& y) {
+  return complex<typename std::common_type<RealType1,RealType2>::type> (x.real () * y.real () - x.imag () * y.imag (),
+                                                                        x.real () * y.imag () + x.imag () * y.real ());
 }
 
 /// \brief Binary * operator for RealType times complex.
 ///
 /// This function exists because the compiler doesn't know that
 /// RealType and complex<RealType> commute with respect to operator*.
-template<class RealType>
+template<class RealType1, class RealType2>
 KOKKOS_INLINE_FUNCTION
-complex<RealType>
-operator * (const RealType& x, const complex<RealType>& y) {
-  return complex<RealType> (x * y.real (), x * y.imag ());
+complex<typename std::common_type<RealType1,RealType2>::type>
+operator * (const RealType1& x, const complex<RealType2>& y) {
+  return complex<typename std::common_type<RealType1,RealType2>::type> (x * y.real (), x * y.imag ());
 }
 
 /// \brief Binary * operator for RealType times complex.
 ///
 /// This function exists because the compiler doesn't know that
 /// RealType and complex<RealType> commute with respect to operator*.
-template<class RealType>
+template<class RealType1, class RealType2>
 KOKKOS_INLINE_FUNCTION
-complex<RealType>
-operator * (const complex<RealType>& y, const RealType& x) {
-  return complex<RealType> (x * y.real (), x * y.imag ());
+complex<typename std::common_type<RealType1,RealType2>::type>
+operator * (const complex<RealType1>& y, const RealType2& x) {
+  return complex<typename std::common_type<RealType1,RealType2>::type> (x * y.real (), x * y.imag ());
 }
 
 //! Imaginary part of a complex number.
@@ -539,33 +676,34 @@ complex<RealType> pow (const complex<RealType>& x) {
 //! Binary operator / for complex and real numbers
 template<class RealType1, class RealType2>
 KOKKOS_INLINE_FUNCTION
-complex<RealType1>
+complex<typename std::common_type<RealType1,RealType2>::type>
 operator / (const complex<RealType1>& x, const RealType2& y) {
-  return complex<RealType1> (real (x) / y, imag (x) / y);
+  return complex<typename std::common_type<RealType1,RealType2>::type> (real (x) / y, imag (x) / y);
 }
 
 //! Binary operator / for complex.
-template<class RealType>
+template<class RealType1, class RealType2>
 KOKKOS_INLINE_FUNCTION
-complex<RealType>
-operator / (const complex<RealType>& x, const complex<RealType>& y) {
+complex<typename std::common_type<RealType1,RealType2>::type>
+operator / (const complex<RealType1>& x, const complex<RealType2>& y) {
   // Scale (by the "1-norm" of y) to avoid unwarranted overflow.
   // If the real part is +/-Inf and the imaginary part is -/+Inf,
   // this won't change the result.
-  const RealType s = std::fabs (real (y)) + std::fabs (imag (y));
+  typedef typename std::common_type<RealType1,RealType2>::type common_real_type;
+  const common_real_type s = std::fabs (real (y)) + std::fabs (imag (y));
 
   // If s is 0, then y is zero, so x/y == real(x)/0 + i*imag(x)/0.
   // In that case, the relation x/y == (x/s) / (y/s) doesn't hold,
   // because y/s is NaN.
   if (s == 0.0) {
-    return complex<RealType> (real (x) / s, imag (x) / s);
+    return complex<common_real_type> (real (x) / s, imag (x) / s);
   }
   else {
-    const complex<RealType> x_scaled (real (x) / s, imag (x) / s);
-    const complex<RealType> y_conj_scaled (real (y) / s, -imag (y) / s);
-    const RealType y_scaled_abs = real (y_conj_scaled) * real (y_conj_scaled) +
+    const complex<common_real_type> x_scaled (real (x) / s, imag (x) / s);
+    const complex<common_real_type> y_conj_scaled (real (y) / s, -imag (y) / s);
+    const RealType1 y_scaled_abs = real (y_conj_scaled) * real (y_conj_scaled) +
       imag (y_conj_scaled) * imag (y_conj_scaled); // abs(y) == abs(conj(y))
-    complex<RealType> result = x_scaled * y_conj_scaled;
+    complex<common_real_type> result = x_scaled * y_conj_scaled;
     result /= y_scaled_abs;
     return result;
   }
@@ -574,16 +712,19 @@ operator / (const complex<RealType>& x, const complex<RealType>& y) {
 //! Binary operator / for complex and real numbers
 template<class RealType1, class RealType2>
 KOKKOS_INLINE_FUNCTION
-complex<RealType1>
+complex<typename std::common_type<RealType1,RealType2>::type>
 operator / (const RealType1& x, const complex<RealType2>& y) {
-  return complex<RealType1> (x)/y;
+  return complex<typename std::common_type<RealType1,RealType2>::type> (x)/y;
 }
 
 //! Equality operator for two complex numbers.
-template<class RealType>
+template<class RealType1, class RealType2>
 KOKKOS_INLINE_FUNCTION
-bool operator == (const complex<RealType>& x, const complex<RealType>& y) {
-  return real (x) == real (y) && imag (x) == imag (y);
+bool
+operator == (const complex<RealType1>& x, const complex<RealType2>& y) {
+  typedef typename std::common_type<RealType1,RealType2>::type common_real_type;
+  return ( static_cast<common_real_type>(real (x)) == static_cast<common_real_type>(real (y)) && 
+           static_cast<common_real_type>(imag (x)) == static_cast<common_real_type>(imag (y)) );
 }
 
 /// \brief Equality operator for std::complex and Kokkos::complex.
@@ -592,50 +733,68 @@ bool operator == (const complex<RealType>& x, const complex<RealType>& y) {
 /// Otherwise, CUDA builds will give compiler warnings ("warning:
 /// calling a constexpr __host__ function("real") from a __host__
 /// __device__ function("operator==") is not allowed").
-template<class RealType>
-bool operator == (const std::complex<RealType>& x, const complex<RealType>& y) {
-  return std::real (x) == real (y) && std::imag (x) == imag (y);
-}
-
+template<class RealType1, class RealType2>
+inline
+bool
+operator == (const std::complex<RealType1>& x, const complex<RealType2>& y) {
+  typedef typename std::common_type<RealType1,RealType2>::type common_real_type;
+  return ( static_cast<common_real_type>(std::real (x)) == static_cast<common_real_type>(real (y)) && 
+           static_cast<common_real_type>(std::imag (x)) == static_cast<common_real_type>(imag (y)) );
+}
+  
 //! Equality operator for complex and real number.
 template<class RealType1, class RealType2>
 KOKKOS_INLINE_FUNCTION
-bool operator == (const complex<RealType1>& x, const RealType2& y) {
-  return real (x) == y && imag (x) == static_cast<RealType1> (0.0);
+bool
+operator == (const complex<RealType1>& x, const RealType2& y) {
+  typedef typename std::common_type<RealType1,RealType2>::type common_real_type;
+  return ( static_cast<common_real_type>(real (x)) == static_cast<common_real_type>(y) && 
+           static_cast<common_real_type>(imag (x)) == static_cast<common_real_type>(0.0) );
 }
 
 //! Equality operator for real and complex number.
-template<class RealType>
+template<class RealType1, class RealType2>
 KOKKOS_INLINE_FUNCTION
-bool operator == (const RealType& x, const complex<RealType>& y) {
+bool
+operator == (const RealType1& x, const complex<RealType2>& y) {
   return y == x;
 }
 
 //! Inequality operator for two complex numbers.
-template<class RealType>
+template<class RealType1, class RealType2>
 KOKKOS_INLINE_FUNCTION
-bool operator != (const complex<RealType>& x, const complex<RealType>& y) {
-  return real (x) != real (y) || imag (x) != imag (y);
+bool
+operator != (const complex<RealType1>& x, const complex<RealType2>& y) {
+  typedef typename std::common_type<RealType1,RealType2>::type common_real_type;
+  return ( static_cast<common_real_type>(real (x)) != static_cast<common_real_type>(real (y)) || 
+           static_cast<common_real_type>(imag (x)) != static_cast<common_real_type>(imag (y)) );
 }
 
 //! Inequality operator for std::complex and Kokkos::complex.
-template<class RealType>
-KOKKOS_INLINE_FUNCTION
-bool operator != (const std::complex<RealType>& x, const complex<RealType>& y) {
-  return std::real (x) != real (y) || std::imag (x) != imag (y);
+template<class RealType1, class RealType2>
+inline
+bool
+operator != (const std::complex<RealType1>& x, const complex<RealType2>& y) {
+  typedef typename std::common_type<RealType1,RealType2>::type common_real_type;
+  return ( static_cast<common_real_type>(std::real (x)) != static_cast<common_real_type>(real (y)) || 
+           static_cast<common_real_type>(std::imag (x)) != static_cast<common_real_type>(imag (y)) );
 }
 
 //! Inequality operator for complex and real number.
 template<class RealType1, class RealType2>
 KOKKOS_INLINE_FUNCTION
-bool operator != (const complex<RealType1>& x, const RealType2& y) {
-  return real (x) != y || imag (x) != static_cast<RealType1> (0.0);
+bool
+operator != (const complex<RealType1>& x, const RealType2& y) {
+  typedef typename std::common_type<RealType1,RealType2>::type common_real_type;
+  return ( static_cast<common_real_type>(real (x)) != static_cast<common_real_type>(y) || 
+           static_cast<common_real_type>(imag (x)) != static_cast<common_real_type>(0.0) );
 }
 
 //! Inequality operator for real and complex number.
-template<class RealType>
+template<class RealType1, class RealType2>
 KOKKOS_INLINE_FUNCTION
-bool operator != (const RealType& x, const complex<RealType>& y) {
+bool
+operator != (const RealType1& x, const complex<RealType2>& y) {
   return y != x;
 }
 
diff --git a/lib/kokkos/core/src/Kokkos_Crs.hpp b/lib/kokkos/core/src/Kokkos_Crs.hpp
index f089c16ad2ca3c71ad7cdf2042e19bee93a7e0ed..b9c131cd7ab8077ad75201be2c6510abc4921633 100644
--- a/lib/kokkos/core/src/Kokkos_Crs.hpp
+++ b/lib/kokkos/core/src/Kokkos_Crs.hpp
@@ -353,7 +353,14 @@ struct CountAndFill {
   struct Fill {};
   KOKKOS_INLINE_FUNCTION void operator()(Fill, size_type i) const {
     auto j = m_crs.row_map(i);
-    data_type* fill = &(m_crs.entries(j));
+    /* we don't want to access entries(entries.size()), even if its just to get its
+       address and never use it.
+       this can happen when row (i) is empty and all rows after it are also empty.
+       we could compare to row_map(i + 1), but that is a read from global memory,
+       whereas dimension_0() should be part of the View in registers (or constant memory) */
+    data_type* fill =
+      (j == static_cast<decltype(j)>(m_crs.entries.dimension_0())) ?
+      nullptr : (&(m_crs.entries(j)));
     m_functor(i, fill);
   }
   using self_type = CountAndFill<CrsType, Functor>;
diff --git a/lib/kokkos/core/src/Kokkos_HBWSpace.hpp b/lib/kokkos/core/src/Kokkos_HBWSpace.hpp
index 9c9af0dd8b8624b98ebfd2fbcefc8bfa613c387f..b811751a2c09c91bb75252a223cd3efa02dfbe31 100644
--- a/lib/kokkos/core/src/Kokkos_HBWSpace.hpp
+++ b/lib/kokkos/core/src/Kokkos_HBWSpace.hpp
@@ -147,12 +147,11 @@ public:
                  , const size_t arg_alloc_size ) const;
 
   /**\brief Return Name of the MemorySpace */
-  static constexpr const char* name();
+  static constexpr const char* name() { return "HBW"; }
 
 private:
 
   AllocationMechanism  m_alloc_mech;
-  static constexpr const char* m_name = "HBW";
   friend class Kokkos::Impl::SharedAllocationRecord< Kokkos::Experimental::HBWSpace, void >;
 };
 
diff --git a/lib/kokkos/core/src/Kokkos_NumericTraits.hpp b/lib/kokkos/core/src/Kokkos_NumericTraits.hpp
index 339571941d8824b77981b36ed32f9d3b131bad78..a825fd54d3918c93e3f84dc6b5cdcb443daf374c 100644
--- a/lib/kokkos/core/src/Kokkos_NumericTraits.hpp
+++ b/lib/kokkos/core/src/Kokkos_NumericTraits.hpp
@@ -192,7 +192,7 @@ template<>
 struct reduction_identity<float> {
   KOKKOS_FORCEINLINE_FUNCTION constexpr static float sum()  {return static_cast<float>(0.0f);}
   KOKKOS_FORCEINLINE_FUNCTION constexpr static float prod() {return static_cast<float>(1.0f);}
-  KOKKOS_FORCEINLINE_FUNCTION constexpr static float max()  {return FLT_MIN;}
+  KOKKOS_FORCEINLINE_FUNCTION constexpr static float max()  {return -FLT_MAX;}
   KOKKOS_FORCEINLINE_FUNCTION constexpr static float min()  {return FLT_MAX;}
 };
 
@@ -200,7 +200,7 @@ template<>
 struct reduction_identity<double> {
   KOKKOS_FORCEINLINE_FUNCTION constexpr static double sum()  {return static_cast<double>(0.0);}
   KOKKOS_FORCEINLINE_FUNCTION constexpr static double prod() {return static_cast<double>(1.0);}
-  KOKKOS_FORCEINLINE_FUNCTION constexpr static double max()  {return DBL_MIN;}
+  KOKKOS_FORCEINLINE_FUNCTION constexpr static double max()  {return -DBL_MAX;}
   KOKKOS_FORCEINLINE_FUNCTION constexpr static double min()  {return DBL_MAX;}
 };
 
@@ -208,7 +208,7 @@ template<>
 struct reduction_identity<long double> {
   KOKKOS_FORCEINLINE_FUNCTION constexpr static long double sum()  {return static_cast<long double>(0.0);}
   KOKKOS_FORCEINLINE_FUNCTION constexpr static long double prod() {return static_cast<long double>(1.0);}
-  KOKKOS_FORCEINLINE_FUNCTION constexpr static long double max()  {return LDBL_MIN;}
+  KOKKOS_FORCEINLINE_FUNCTION constexpr static long double max()  {return -LDBL_MAX;}
   KOKKOS_FORCEINLINE_FUNCTION constexpr static long double min()  {return LDBL_MAX;}
 };
 
diff --git a/lib/kokkos/core/src/Kokkos_ROCm.hpp b/lib/kokkos/core/src/Kokkos_ROCm.hpp
index b13b0b01dea588e3ddf2fd57a7be5b24005d4498..0118d4667e30825dbcb428a91445cb1d42532b48 100644
--- a/lib/kokkos/core/src/Kokkos_ROCm.hpp
+++ b/lib/kokkos/core/src/Kokkos_ROCm.hpp
@@ -211,6 +211,24 @@ struct VerifyExecutionCanAccessMemorySpace
 } // namespace Kokkos
 
 
+
+#define threadIdx_x (hc_get_workitem_id(0))
+#define threadIdx_y (hc_get_workitem_id(1))
+#define threadIdx_z (hc_get_workitem_id(2))
+
+#define blockIdx_x  (hc_get_group_id(0))
+#define blockIdx_y  (hc_get_group_id(1))
+#define blockIdx_z  (hc_get_group_id(2))
+
+#define blockDim_x  (hc_get_group_size(0))
+#define blockDim_y  (hc_get_group_size(1))
+#define blockDim_z  (hc_get_group_size(2))
+
+#define gridDim_x   (hc_get_num_groups(0))
+#define gridDim_y   (hc_get_num_groups(1))
+#define gridDim_z   (hc_get_num_groups(2))
+
+
 #include <ROCm/Kokkos_ROCm_Parallel.hpp>
 #include <ROCm/Kokkos_ROCm_Task.hpp>
 
diff --git a/lib/kokkos/core/src/Makefile b/lib/kokkos/core/src/Makefile
index 8fb13b89549b52ef790c6c1488321e9df18d3b41..a917cf16560a32ab060043854a89f1e662820974 100644
--- a/lib/kokkos/core/src/Makefile
+++ b/lib/kokkos/core/src/Makefile
@@ -88,6 +88,7 @@ build-makefile-kokkos:
 	echo "KOKKOS_SRC = $(KOKKOS_SRC)" >> Makefile.kokkos
 	echo "" >> Makefile.kokkos
 	echo "#Variables used in application Makefiles" >> Makefile.kokkos
+	echo "KOKKOS_OS = $(KOKKOS_OS)" >> Makefile.kokkos
 	echo "KOKKOS_CPP_DEPENDS = $(KOKKOS_CPP_DEPENDS)" >> Makefile.kokkos
 	echo "KOKKOS_CXXFLAGS = $(KOKKOS_CXXFLAGS)" >> Makefile.kokkos
 	echo "KOKKOS_CPPFLAGS = $(KOKKOS_CPPFLAGS)" >> Makefile.kokkos
diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp
index 37d2ac831801ac35feca1e250f19486ff842524e..de84f6e59fed9f300b2d2bde514308d7a272187d 100644
--- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp
+++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp
@@ -211,6 +211,7 @@ void OpenMP::partition_master( F const& f
                                                  , thread_local_bytes
                                                  );
 
+      omp_set_num_threads(partition_size);
       f( omp_get_thread_num(), omp_get_num_threads() );
 
       Impl::t_openmp_instance->~Exec();
diff --git a/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Reduce.hpp b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Reduce.hpp
index 0b7a1e2583d1b88953da2524885317662b3cbd99..f2674e592951a3faefe4b8d3c4a80bb2eb325ea6 100644
--- a/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Reduce.hpp
+++ b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Reduce.hpp
@@ -113,7 +113,6 @@ void reduce_enqueue(
 
   if (output_length < 1) return;
 
-  assert(output_result != nullptr);
   const auto td = get_tile_desc<T>(szElements,output_length,team_size,vector_size, shared_size);
 
   // allocate host and device memory for the results from each team
@@ -176,14 +175,17 @@ void reduce_enqueue(
       }
       
   });
-  ValueInit::init(ReducerConditional::select(f, reducer), output_result);
+  if (output_result != nullptr)
+     ValueInit::init(ReducerConditional::select(f, reducer), output_result);
   fut.wait();
 
   copy(result,result_cpu.data());
-  for(std::size_t i=0;i<td.num_tiles;i++)
-    ValueJoin::join(ReducerConditional::select(f, reducer), output_result, result_cpu.data()+i*output_length);
+  if (output_result != nullptr) {
+    for(std::size_t i=0;i<td.num_tiles;i++)
+       ValueJoin::join(ReducerConditional::select(f, reducer), output_result, result_cpu.data()+i*output_length);
 
-  ValueFinal::final( ReducerConditional::select(f, reducer) , output_result );
+    ValueFinal::final( ReducerConditional::select(f, reducer) , output_result );
+  }
 
 }
 
diff --git a/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Scan.hpp b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Scan.hpp
index acf75f6f1366fbc618e075ded29e52f7aa1a742c..c2e85ad11279dc329ec85e0e7e08e972abf52f22 100644
--- a/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Scan.hpp
+++ b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Scan.hpp
@@ -67,7 +67,7 @@ void scan_enqueue(
     hc::array<value_type> result(td.num_tiles);
     hc::array<value_type> scratch(len);
 
-    tile_for<value_type>(td, [&,len,td](hc::tiled_index<1> t_idx, tile_buffer<value_type> buffer) [[hc]] 
+    tile_for<value_type>(td, [&,f,len,td](hc::tiled_index<1> t_idx, tile_buffer<value_type> buffer) [[hc]] 
     {
         const auto local = t_idx.local[0];
         const auto global = t_idx.global[0];
@@ -135,7 +135,7 @@ void scan_enqueue(
       ValueJoin::join(f, &result_cpu[i], &result_cpu[i-1]);
 
     copy(result_cpu.data(),result);
-    hc::parallel_for_each(hc::extent<1>(len).tile(td.tile_size), [&,len,td](hc::tiled_index<1> t_idx) [[hc]] 
+    hc::parallel_for_each(hc::extent<1>(len).tile(td.tile_size), [&,f,len,td](hc::tiled_index<1> t_idx) [[hc]] 
     {
 //        const auto local = t_idx.local[0];
         const auto global = t_idx.global[0];
diff --git a/lib/kokkos/core/src/impl/Kokkos_BitOps.hpp b/lib/kokkos/core/src/impl/Kokkos_BitOps.hpp
index 3d3029535e9c770b65cbe07af5f6256fd01ada0d..c5e73c8b26b0d327c6ee40fd6ad429b0a91e8f50 100644
--- a/lib/kokkos/core/src/impl/Kokkos_BitOps.hpp
+++ b/lib/kokkos/core/src/impl/Kokkos_BitOps.hpp
@@ -68,6 +68,8 @@ int bit_first_zero( unsigned i ) noexcept
   return full != i ? _bit_scan_forward( ~i ) : -1 ;
 #elif defined( KOKKOS_COMPILER_IBM )
   return full != i ? __cnttz4( ~i ) : -1 ;
+#elif defined( KOKKOS_COMPILER_CRAYC )
+  return full != i ? _popcnt( i ^ (i+1) ) - 1 : -1 ;
 #elif defined( KOKKOS_COMPILER_GNU ) || defined( __GNUC__ ) || defined( __GNUG__ )
   return full != i ? __builtin_ffs( ~i ) - 1 : -1 ;
 #else
@@ -90,17 +92,16 @@ int bit_scan_forward( unsigned i )
   return _bit_scan_forward(i);
 #elif defined( KOKKOS_COMPILER_IBM )
   return __cnttz4(i);
+#elif defined( KOKKOS_COMPILER_CRAYC )
+  return i ? _popcnt(~i & (i-1)) : -1;
 #elif defined( KOKKOS_COMPILER_GNU ) || defined( __GNUC__ ) || defined( __GNUG__ )
   return __builtin_ffs(i) - 1;
 #else
-  unsigned t = 1u;
-  int r = 0;
-  while ( i && ( i & t == 0 ) )
-  {
-    t = t << 1;
-    ++r;
+  int offset = -1;
+  if ( i ) {
+    for ( offset = 0 ; (i & ( 1 << offset ) ) == 0 ; ++offset );
   }
-  return r;
+  return offset;
 #endif
 }
 
@@ -116,17 +117,16 @@ int bit_scan_reverse( unsigned i )
   return _bit_scan_reverse(i);
 #elif defined( KOKKOS_COMPILER_IBM )
   return shift - __cntlz4(i);
+#elif defined( KOKKOS_COMPILER_CRAYC )
+  return i ? shift - _leadz32(i) : 0 ;
 #elif defined( __GNUC__ ) || defined( __GNUG__ )
   return shift - __builtin_clz(i);
 #else
-  unsigned t = 1u << shift;
-  int r = 0;
-  while ( i && ( i & t == 0 ) )
-  {
-    t = t >> 1;
-    ++r;
+  int offset = 0;
+  if ( i ) {
+    for ( offset = shift ; (i & ( 1 << offset ) ) == 0 ; --offset );
   }
-  return r;
+  return offset;
 #endif
 }
 
@@ -142,6 +142,8 @@ int bit_count( unsigned i )
   return _popcnt32(i);
 #elif defined( KOKKOS_COMPILER_IBM )
   return __popcnt4(i);
+#elif defined( KOKKOS_COMPILER_CRAYC )
+  return _popcnt(i);
 #elif defined( __GNUC__ ) || defined( __GNUG__ )
   return __builtin_popcount(i);
 #else
diff --git a/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp b/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp
index e11f8b6d346491f75fe0e18f0bda85385233907e..cd0553218d0753bf9185fd9a91014b47ea68a80b 100644
--- a/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp
+++ b/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp
@@ -166,10 +166,6 @@ void HBWSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_s
   }
 }
 
-constexpr const char* HBWSpace::name() {
-  return m_name;
-}
-
 } // namespace Experimental
 } // namespace Kokkos
 
diff --git a/lib/kokkos/core/unit_test/TestComplex.hpp b/lib/kokkos/core/unit_test/TestComplex.hpp
index ce5537fed362a43eac1b57c8f63a06d7329c1ff4..c7f681699e40ae1b921ce0d2ef1dcf0f31c1424c 100644
--- a/lib/kokkos/core/unit_test/TestComplex.hpp
+++ b/lib/kokkos/core/unit_test/TestComplex.hpp
@@ -114,7 +114,7 @@ struct TestComplexBasicMath {
   typename Kokkos::View<Kokkos::complex<double>*,ExecSpace>::HostMirror h_results;
 
   void testit () {
-    d_results = Kokkos::View<Kokkos::complex<double>*,ExecSpace>("TestComplexBasicMath",20);
+    d_results = Kokkos::View<Kokkos::complex<double>*,ExecSpace>("TestComplexBasicMath",24);
     h_results = Kokkos::create_mirror_view(d_results);
 
     Kokkos::parallel_for(Kokkos::RangePolicy<ExecSpace>(0,1), *this);
@@ -125,6 +125,7 @@ struct TestComplexBasicMath {
     std::complex<double> b(3.25,5.75);
     std::complex<double> d(1.0,2.0);
     double c = 9.3;
+    int e = 2;
 
     std::complex<double> r;
     r = a+b; ASSERT_FLOAT_EQ(h_results(0).real(),  r.real()); ASSERT_FLOAT_EQ(h_results(0).imag(),  r.imag());
@@ -147,6 +148,12 @@ struct TestComplexBasicMath {
     r = c-a; ASSERT_FLOAT_EQ(h_results(17).real(), r.real()); ASSERT_FLOAT_EQ(h_results(17).imag(), r.imag());
     r = c*a; ASSERT_FLOAT_EQ(h_results(18).real(), r.real()); ASSERT_FLOAT_EQ(h_results(18).imag(), r.imag());
     r = c/a; ASSERT_FLOAT_EQ(h_results(19).real(), r.real()); ASSERT_FLOAT_EQ(h_results(19).imag(), r.imag());
+
+    r = a; 
+    /* r = a+e; */ ASSERT_FLOAT_EQ(h_results(20).real(),  r.real()+e); ASSERT_FLOAT_EQ(h_results(20).imag(),  r.imag());
+    /* r = a-e; */ ASSERT_FLOAT_EQ(h_results(21).real(),  r.real()-e); ASSERT_FLOAT_EQ(h_results(21).imag(),  r.imag());
+    /* r = a*e; */ ASSERT_FLOAT_EQ(h_results(22).real(),  r.real()*e); ASSERT_FLOAT_EQ(h_results(22).imag(),  r.imag()*e);
+    /* r = a/e; */ ASSERT_FLOAT_EQ(h_results(23).real(),  r.real()/2); ASSERT_FLOAT_EQ(h_results(23).imag(),  r.imag()/e);
   }
 
   KOKKOS_INLINE_FUNCTION
@@ -190,6 +197,12 @@ struct TestComplexBasicMath {
     d_results(17) = c-a;
     d_results(18) = c*a;
     d_results(19) = c/a;
+
+    int e = 2;
+    d_results(20) = a+e;
+    d_results(21) = a-e;
+    d_results(22) = a*e;
+    d_results(23) = a/e;
   }
 };
 
diff --git a/lib/kokkos/core/unit_test/TestMDRange.hpp b/lib/kokkos/core/unit_test/TestMDRange.hpp
index f579ddf02c8242a43931c5177cfc3cad58c45078..fbc3a65c2fbf3eb3aea2127a17495dd81fb2f5c7 100644
--- a/lib/kokkos/core/unit_test/TestMDRange.hpp
+++ b/lib/kokkos/core/unit_test/TestMDRange.hpp
@@ -286,7 +286,9 @@ struct TestMDRange_2D {
     // Test with reducers - scalar
     {
       typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2>, Kokkos::IndexType<int> > range_type;
-      range_type range( {{ 0, 0 }}, {{ N0, N1 }}, {{ 3, 3 }} );
+      int s0 = 1;
+      int s1 = 1;
+      range_type range( {{ s0, s1 }}, {{ N0, N1 }}, {{ 3, 3 }} );
 
       TestMDRange_2D functor( N0, N1 );
 
@@ -297,7 +299,7 @@ struct TestMDRange_2D {
 
       parallel_reduce( range, functor, reducer_scalar );
 
-      ASSERT_EQ( sum, 2 * N0 * N1 );
+      ASSERT_EQ( sum, 2 * (N0 - s0) * (N1 - s1) );
     }
     // Test with reducers - scalar view
     {
@@ -445,7 +447,9 @@ struct TestMDRange_2D {
       typedef typename range_type::tile_type tile_type;
       typedef typename range_type::point_type point_type;
 
-      range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 3, 3 } } );
+      const int s0 = 1;
+      const int s1 = 1;
+      range_type range( point_type{ { s0, s1 } }, point_type{ { N0, N1 } }, tile_type{ { 3, 3 } } );
       TestMDRange_2D functor( N0, N1 );
 
       parallel_for( range, functor );
@@ -454,8 +458,8 @@ struct TestMDRange_2D {
       Kokkos::deep_copy( h_view, functor.input_view );
 
       int counter = 0;
-      for ( int i = 0; i < N0; ++i )
-      for ( int j = 0; j < N1; ++j )
+      for ( int i = s0; i < N0; ++i )
+      for ( int j = s1; j < N1; ++j )
       {
         if ( h_view( i, j ) != 3 ) {
           ++counter;
@@ -463,7 +467,7 @@ struct TestMDRange_2D {
       }
 
       if ( counter != 0 ) {
-        printf( "Default Layouts + InitTag op(): Errors in test_for2; mismatches = %d\n\n", counter );
+        printf( "Offset Start + Default Layouts + InitTag op(): Errors in test_for2; mismatches = %d\n\n", counter );
       }
 
       ASSERT_EQ( counter, 0 );
@@ -699,6 +703,7 @@ struct TestMDRange_2D {
 
       ASSERT_EQ( counter, 0 );
     }
+
   } // end test_for2
 }; // MDRange_2D
 
@@ -749,7 +754,10 @@ struct TestMDRange_3D {
       typedef typename range_type::tile_type tile_type;
       typedef typename range_type::point_type point_type;
 
-      range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 3, 3, 3 } } );
+      int s0 = 1;
+      int s1 = 1;
+      int s2 = 1;
+      range_type range( point_type{ { s0, s1, s2 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 3, 3, 3 } } );
 
       TestMDRange_3D functor( N0, N1, N2 );
 
@@ -757,7 +765,7 @@ struct TestMDRange_3D {
       double sum = 0.0;
       parallel_reduce( range, functor, sum );
 
-      ASSERT_EQ( sum, 2 * N0 * N1 * N2 );
+      ASSERT_EQ( sum, 2 * (N0 - s0) * (N1 - s1) * (N2 - s2) );
     }
 
     // Test with reducers - scalar
@@ -952,7 +960,10 @@ struct TestMDRange_3D {
       typedef typename range_type::tile_type tile_type;
       typedef typename range_type::point_type point_type;
 
-      range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 3, 3, 3 } } );
+      int s0 = 1;
+      int s1 = 1;
+      int s2 = 1;
+      range_type range( point_type{ { s0, s1, s2 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 3, 3, 3 } } );
       TestMDRange_3D functor( N0, N1, N2 );
 
       parallel_for( range, functor );
@@ -961,9 +972,9 @@ struct TestMDRange_3D {
       Kokkos::deep_copy( h_view, functor.input_view );
 
       int counter = 0;
-      for ( int i = 0; i < N0; ++i )
-      for ( int j = 0; j < N1; ++j )
-      for ( int k = 0; k < N2; ++k )
+      for ( int i = s0; i < N0; ++i )
+      for ( int j = s1; j < N1; ++j )
+      for ( int k = s2; k < N2; ++k )
       {
         if ( h_view( i, j, k ) != 3 ) {
           ++counter;
@@ -971,7 +982,7 @@ struct TestMDRange_3D {
       }
 
       if ( counter != 0 ) {
-        printf( "Defaults + InitTag op(): Errors in test_for3; mismatches = %d\n\n", counter );
+        printf( "Offset Start + Defaults + InitTag op(): Errors in test_for3; mismatches = %d\n\n", counter );
       }
 
       ASSERT_EQ( counter, 0 );
@@ -1207,7 +1218,11 @@ struct TestMDRange_4D {
       typedef typename range_type::tile_type tile_type;
       typedef typename range_type::point_type point_type;
 
-      range_type range( point_type{ { 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3 } }, tile_type{ { 3, 3, 3, 3 } } );
+      int s0 = 1;
+      int s1 = 1;
+      int s2 = 1;
+      int s3 = 1;
+      range_type range( point_type{ { s0, s1, s2, s3 } }, point_type{ { N0, N1, N2, N3 } }, tile_type{ { 3, 3, 3, 3 } } );
 
       TestMDRange_4D functor( N0, N1, N2, N3 );
 
@@ -1215,7 +1230,7 @@ struct TestMDRange_4D {
       double sum = 0.0;
       parallel_reduce( range, functor, sum );
 
-      ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 );
+      ASSERT_EQ( sum, 2 * (N0 - s0) * (N1 - s1) * (N2 - s2) * (N3 - s3) );
     }
 
     // Test with reducers - scalar
@@ -1415,7 +1430,11 @@ struct TestMDRange_4D {
       typedef typename range_type::tile_type tile_type;
       typedef typename range_type::point_type point_type;
 
-      range_type range( point_type{ { 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3 } }, tile_type{ { 3, 11, 3, 3 } } );
+      int s0 = 1;
+      int s1 = 1;
+      int s2 = 1;
+      int s3 = 1;
+      range_type range( point_type{ { s0, s1, s2, s3 } }, point_type{ { N0, N1, N2, N3 } }, tile_type{ { 3, 11, 3, 3 } } );
       TestMDRange_4D functor( N0, N1, N2, N3 );
 
       parallel_for( range, functor );
@@ -1424,10 +1443,10 @@ struct TestMDRange_4D {
       Kokkos::deep_copy( h_view, functor.input_view );
 
       int counter = 0;
-      for ( int i = 0; i < N0; ++i )
-      for ( int j = 0; j < N1; ++j )
-      for ( int k = 0; k < N2; ++k )
-      for ( int l = 0; l < N3; ++l )
+      for ( int i = s0; i < N0; ++i )
+      for ( int j = s1; j < N1; ++j )
+      for ( int k = s2; k < N2; ++k )
+      for ( int l = s3; l < N3; ++l )
       {
         if ( h_view( i, j, k, l ) != 3 ) {
           ++counter;
@@ -1435,7 +1454,7 @@ struct TestMDRange_4D {
       }
 
       if ( counter != 0 ) {
-        printf("Defaults +m_tile > m_upper dim2 InitTag op(): Errors in test_for4; mismatches = %d\n\n",counter);
+        printf("Offset Start + Defaults +m_tile > m_upper dim2 InitTag op(): Errors in test_for4; mismatches = %d\n\n",counter);
       }
 
       ASSERT_EQ( counter, 0 );
@@ -1682,7 +1701,12 @@ struct TestMDRange_5D {
       typedef typename range_type::tile_type tile_type;
       typedef typename range_type::point_type point_type;
 
-      range_type range( point_type{ { 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4 } }, tile_type{ { 3, 3, 3, 3, 3 } } );
+      int s0 = 1;
+      int s1 = 1;
+      int s2 = 1;
+      int s3 = 1;
+      int s4 = 1;
+      range_type range( point_type{ { s0, s1, s2, s3, s4 } }, point_type{ { N0, N1, N2, N3, N4 } }, tile_type{ { 3, 3, 3, 3, 3 } } );
 
       TestMDRange_5D functor( N0, N1, N2, N3, N4 );
 
@@ -1690,7 +1714,7 @@ struct TestMDRange_5D {
       double sum = 0.0;
       parallel_reduce( range, functor, sum );
 
-      ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 * N4 );
+      ASSERT_EQ( sum, 2 * (N0 - s0) * (N1 - s1) * (N2 - s2) * (N3 - s3) * (N4 - s4) );
     }
 
     // Test with reducers - scalar
@@ -1810,7 +1834,12 @@ struct TestMDRange_5D {
       typedef typename range_type::tile_type tile_type;
       typedef typename range_type::point_type point_type;
 
-      range_type range( point_type{ { 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4 } }, tile_type{ { 3, 3, 3, 3, 5 } } );
+      int s0 = 1;
+      int s1 = 1;
+      int s2 = 1;
+      int s3 = 1;
+      int s4 = 1;
+      range_type range( point_type{ { s0, s1, s2, s3, s4 } }, point_type{ { N0, N1, N2, N3, N4 } }, tile_type{ { 3, 3, 3, 3, 5 } } );
       TestMDRange_5D functor( N0, N1, N2, N3, N4 );
 
       parallel_for( range, functor );
@@ -1819,11 +1848,11 @@ struct TestMDRange_5D {
       Kokkos::deep_copy( h_view, functor.input_view );
 
       int counter = 0;
-      for ( int i = 0; i < N0; ++i )
-      for ( int j = 0; j < N1; ++j )
-      for ( int k = 0; k < N2; ++k )
-      for ( int l = 0; l < N3; ++l )
-      for ( int m = 0; m < N4; ++m )
+      for ( int i = s0; i < N0; ++i )
+      for ( int j = s1; j < N1; ++j )
+      for ( int k = s2; k < N2; ++k )
+      for ( int l = s3; l < N3; ++l )
+      for ( int m = s4; m < N4; ++m )
       {
         if ( h_view( i, j, k, l, m ) != 3 ) {
           ++counter;
@@ -1831,7 +1860,7 @@ struct TestMDRange_5D {
       }
 
       if ( counter != 0 ) {
-        printf( "Defaults + InitTag op(): Errors in test_for5; mismatches = %d\n\n", counter );
+        printf( "Offset Start + Defaults + InitTag op(): Errors in test_for5; mismatches = %d\n\n", counter );
       }
 
       ASSERT_EQ( counter, 0 );
@@ -2084,7 +2113,13 @@ struct TestMDRange_6D {
       typedef typename range_type::tile_type tile_type;
       typedef typename range_type::point_type point_type;
 
-      range_type range( point_type{ { 0, 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4, N5 } }, tile_type{ { 3, 3, 3, 3, 3, 2 } } );
+      int s0 = 1;
+      int s1 = 1;
+      int s2 = 1;
+      int s3 = 1;
+      int s4 = 1;
+      int s5 = 1;
+      range_type range( point_type{ { s0, s1, s2, s3, s4, s5 } }, point_type{ { N0, N1, N2, N3, N4, N5 } }, tile_type{ { 3, 3, 3, 3, 3, 2 } } );
 
       TestMDRange_6D functor( N0, N1, N2, N3, N4, N5 );
 
@@ -2092,7 +2127,7 @@ struct TestMDRange_6D {
       double sum = 0.0;
       parallel_reduce( range, functor, sum );
 
-      ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 * N4 * N5 );
+      ASSERT_EQ( sum, 2 * (N0 - s0) * (N1 - s1) * (N2 - s2) * (N3 - s3) * (N4 - s4) * (N5 - s5) );
     }
 
     // Test with reducers - scalar
@@ -2214,7 +2249,13 @@ struct TestMDRange_6D {
       typedef typename range_type::tile_type tile_type;
       typedef typename range_type::point_type point_type;
 
-      range_type range( point_type{ { 0, 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4, N5 } }, tile_type{ { 3, 3, 3, 3, 2, 3 } } ); //tile dims 3,3,3,3,3,3 more than cuda can handle with debugging
+      int s0 = 1;
+      int s1 = 1;
+      int s2 = 1;
+      int s3 = 1;
+      int s4 = 1;
+      int s5 = 1;
+      range_type range( point_type{ { s0, s1, s2, s3, s4, s5 } }, point_type{ { N0, N1, N2, N3, N4, N5 } }, tile_type{ { 3, 3, 3, 3, 2, 3 } } ); //tile dims 3,3,3,3,3,3 more than cuda can handle with debugging
       TestMDRange_6D functor( N0, N1, N2, N3, N4, N5 );
 
       parallel_for( range, functor );
@@ -2223,12 +2264,12 @@ struct TestMDRange_6D {
       Kokkos::deep_copy( h_view, functor.input_view );
 
       int counter = 0;
-      for ( int i = 0; i < N0; ++i )
-      for ( int j = 0; j < N1; ++j )
-      for ( int k = 0; k < N2; ++k )
-      for ( int l = 0; l < N3; ++l )
-      for ( int m = 0; m < N4; ++m )
-      for ( int n = 0; n < N5; ++n )
+      for ( int i = s0; i < N0; ++i )
+      for ( int j = s1; j < N1; ++j )
+      for ( int k = s2; k < N2; ++k )
+      for ( int l = s3; l < N3; ++l )
+      for ( int m = s4; m < N4; ++m )
+      for ( int n = s5; n < N5; ++n )
       {
         if ( h_view( i, j, k, l, m, n ) != 3 ) {
           ++counter;
@@ -2236,7 +2277,7 @@ struct TestMDRange_6D {
       }
 
       if ( counter != 0 ) {
-        printf( "Defaults + InitTag op(): Errors in test_for6; mismatches = %d\n\n", counter );
+        printf( "Offset Start + Defaults + InitTag op(): Errors in test_for6; mismatches = %d\n\n", counter );
       }
 
       ASSERT_EQ( counter, 0 );
diff --git a/lib/latte/Install.py b/lib/latte/Install.py
index b3e771e4cc7b53bda4ff0082256a6bc5e169c526..37cb5d6b17f135aa7b2c371278d1d78b7e6ec3b7 100644
--- a/lib/latte/Install.py
+++ b/lib/latte/Install.py
@@ -159,13 +159,13 @@ if buildflag or pathflag:
     os.remove("includelink")
   if os.path.isfile("liblink") or os.path.islink("liblink"):
     os.remove("liblink")
-  if os.path.isfile("filelink") or os.path.islink("filelink"):
-    os.remove("filelink")
+  if os.path.isfile("filelink.o") or os.path.islink("filelink.o"):
+    os.remove("filelink.o")
   cmd = 'ln -s "%s/src" includelink' % lattedir
   subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
   cmd = 'ln -s "%s" liblink' % lattedir
   subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
-  cmd = 'ln -s "%s/src/latte_c_bind.o" filelink' % lattedir
+  cmd = 'ln -s "%s/src/latte_c_bind.o" filelink.o' % lattedir
   subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
 
 # copy Makefile.lammps.suffix to Makefile.lammps
diff --git a/lib/latte/Makefile.lammps.gfortran b/lib/latte/Makefile.lammps.gfortran
index 921721552be381ecbb8b46c43657c5b679006448..6aa7782f8ac2a0f7430ef89bc70c1543cd46c351 100644
--- a/lib/latte/Makefile.lammps.gfortran
+++ b/lib/latte/Makefile.lammps.gfortran
@@ -3,5 +3,5 @@
 # GNU Fortran settings
 
 latte_SYSINC = 
-latte_SYSLIB = ../../lib/latte/filelink -llatte -lgfortran -llapack -lblas
+latte_SYSLIB = ../../lib/latte/filelink.o -llatte -lgfortran -llapack -lblas
 latte_SYSPATH = -fopenmp
diff --git a/lib/latte/Makefile.lammps.ifort b/lib/latte/Makefile.lammps.ifort
index 23d2b32fcc713dc6b6fcd24d8b0473649d279377..0491bdd8a5f41065df995def3b3c0105600fdf28 100644
--- a/lib/latte/Makefile.lammps.ifort
+++ b/lib/latte/Makefile.lammps.ifort
@@ -3,7 +3,7 @@
 # Intel ifort settings
 
 latte_SYSINC = 
-latte_SYSLIB = ../../lib/latte/filelink \
+latte_SYSLIB = ../../lib/latte/filelink.o \
                -llatte -lifcore -lsvml -lompstub -limf -lmkl_intel_lp64 \
                -lmkl_intel_thread -lmkl_core -lmkl_intel_thread -lpthread \
                -openmp -O0
diff --git a/python/lammps.py b/python/lammps.py
index 2db657fbae84c17a7afb6af7b10eecebb4bb4ccf..944eaeabf5a982afddfb79cc8a9f4f286c8aa94e 100644
--- a/python/lammps.py
+++ b/python/lammps.py
@@ -861,6 +861,19 @@ class PyLammps(object):
     """ needed for Python2 compatibility, since print is a reserved keyword """
     return self.__getattr__("print")(s)
 
+  def __dir__(self):
+    return ['angle_coeff', 'angle_style', 'atom_modify', 'atom_style', 'atom_style',
+    'bond_coeff', 'bond_style', 'boundary', 'change_box', 'communicate', 'compute',
+    'create_atoms', 'create_box', 'delete_atoms', 'delete_bonds', 'dielectric',
+    'dihedral_coeff', 'dihedral_style', 'dimension', 'dump', 'fix', 'fix_modify',
+    'group', 'improper_coeff', 'improper_style', 'include', 'kspace_modify',
+    'kspace_style', 'lattice', 'mass', 'minimize', 'min_style', 'neighbor',
+    'neigh_modify', 'newton', 'nthreads', 'pair_coeff', 'pair_modify',
+    'pair_style', 'processors', 'read', 'read_data', 'read_restart', 'region',
+    'replicate', 'reset_timestep', 'restart', 'run', 'run_style', 'thermo',
+    'thermo_modify', 'thermo_style', 'timestep', 'undump', 'unfix', 'units',
+    'variable', 'velocity', 'write_restart']
+
   def __getattr__(self, name):
     def handler(*args, **kwargs):
       cmd_args = [name] + [str(x) for x in args]
diff --git a/src/.gitignore b/src/.gitignore
index 1571065b72e57ce17ef3893a2706230ff2e1038d..8d997760966204a8eb1da712f98df065299c2cc1 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -103,6 +103,15 @@
 /sna.cpp
 /sna.h
 
+/uef_*.cpp
+/uef_*.h
+/compute_*uef.cpp
+/compute_*uef.h
+/dump_*uef.cpp
+/dump_*uef.h
+/fix_*uef.cpp
+/fix_*uef.h
+
 /atom_vec_wavepacket.cpp
 /atom_vec_wavepacket.h
 /fix_nve_awpmd.cpp
@@ -185,6 +194,8 @@
 /bond_fene.h
 /bond_fene_expand.cpp
 /bond_fene_expand.h
+/bond_gromos.cpp
+/bond_gromos.h
 /bond_harmonic.cpp
 /bond_harmonic.h
 /bond_harmonic_shift.cpp
@@ -405,6 +416,8 @@
 /fix_lambdah_calc.h
 /fix_langevin_eff.cpp
 /fix_langevin_eff.h
+/fix_latte.cpp
+/fix_latte.h
 /fix_lb_fluid.cpp
 /fix_lb_fluid.h
 /fix_lb_momentum.cpp
@@ -503,6 +516,8 @@
 /fix_reaxc_bonds.h
 /fix_reaxc_species.cpp
 /fix_reaxc_species.h
+/fix_rhok.cpp
+/fix_rhok.h
 /fix_rigid.cpp
 /fix_rigid.h
 /fix_rigid_nh.cpp
diff --git a/src/CORESHELL/Install.sh b/src/CORESHELL/Install.sh
index 7c0b7a02a23c0c5817eef8b1c2764f0dc98dd295..93c8fe8132c2bcdc59252719e3d7d42766583d3d 100644
--- a/src/CORESHELL/Install.sh
+++ b/src/CORESHELL/Install.sh
@@ -30,13 +30,19 @@ action () {
 
 action compute_temp_cs.cpp
 action compute_temp_cs.h
-action pair_born_coul_long_cs.cpp pair_born_coul_long.cpp
-action pair_born_coul_dsf_cs.cpp pair_born_coul_dsf.cpp
-action pair_buck_coul_long_cs.cpp pair_buck_coul_long.cpp
-action pair_born_coul_long_cs.h pair_born_coul_long.h
-action pair_born_coul_dsf_cs.h pair_born_coul_dsf.h
-action pair_buck_coul_long_cs.h pair_buck_coul_long.h
-action pair_coul_long_cs.cpp pair_coul_long.cpp
-action pair_coul_long_cs.h pair_coul_long.h
+
+action pair_born_coul_long_cs.cpp   pair_born_coul_long.cpp
+action pair_born_coul_dsf_cs.cpp    pair_born_coul_dsf.cpp
+action pair_buck_coul_long_cs.cpp   pair_buck_coul_long.cpp
+action pair_born_coul_long_cs.h     pair_born_coul_long.h
+action pair_born_coul_dsf_cs.h      pair_born_coul_dsf.h
+action pair_buck_coul_long_cs.h     pair_buck_coul_long.h
+action pair_coul_long_cs.cpp        pair_coul_long.cpp
+action pair_coul_long_cs.h          pair_coul_long.h
 action pair_lj_cut_coul_long_cs.cpp pair_lj_cut_coul_long.cpp
-action pair_lj_cut_coul_long_cs.h pair_lj_cut_coul_long.h
+action pair_lj_cut_coul_long_cs.h   pair_lj_cut_coul_long.h
+
+action pair_born_coul_wolf_cs.cpp   pair_born_coul_wolf.cpp
+action pair_born_coul_wolf_cs.h     pair_born_coul_wolf.h
+action pair_coul_wolf_cs.cpp        pair_coul_wolf.cpp
+action pair_coul_wolf_cs.h          pair_coul_wolf.h
diff --git a/src/CORESHELL/pair_born_coul_wolf_cs.cpp b/src/CORESHELL/pair_born_coul_wolf_cs.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3c98f5bf770cc2aff2881480fb65ab2dfd18f41f
--- /dev/null
+++ b/src/CORESHELL/pair_born_coul_wolf_cs.cpp
@@ -0,0 +1,169 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "pair_born_coul_wolf_cs.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "math_const.h"
+#include "math_special.h"
+#include "memory.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+using namespace MathSpecial;
+
+#define EPSILON 1.0e-20
+
+/* ---------------------------------------------------------------------- */
+
+PairBornCoulWolfCS::PairBornCoulWolfCS(LAMMPS *lmp) : PairBornCoulWolf(lmp)
+{
+  single_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairBornCoulWolfCS::compute(int eflag, int vflag)
+{
+  int i,j,ii,jj,inum,jnum,itype,jtype;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
+  double rsq,r2inv,r6inv,forcecoul,forceborn,factor_coul,factor_lj;
+  double prefactor;
+  double r,rexp;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+  double erfcc,erfcd,v_sh,dvdrr,e_self,e_shift,f_shift,qisq;
+
+  evdwl = ecoul = 0.0;
+  if (eflag || vflag) ev_setup(eflag,vflag);
+  else evflag = vflag_fdotr = 0;
+
+  double **x = atom->x;
+  double **f = atom->f;
+  double *q = atom->q;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+  double *special_coul = force->special_coul;
+  double *special_lj = force->special_lj;
+  int newton_pair = force->newton_pair;
+  double qqrd2e = force->qqrd2e;
+
+  // self and shifted coulombic energy
+
+  e_self = v_sh = 0.0;
+  e_shift = erfc(alf*cut_coul)/cut_coul;
+  f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) / cut_coul;
+
+  inum = list->inum;
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = 0; ii < inum; ii++) {
+    i = ilist[ii];
+    qtmp = q[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    qisq = qtmp*qtmp;
+    e_self = -(e_shift/2.0 + alf/MY_PIS) * qisq*qqrd2e;
+    if (eflag) ev_tally(i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0);
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_lj = special_lj[sbmask(j)];
+      factor_coul = special_coul[sbmask(j)];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+		rsq += EPSILON;
+		// Add EPISLON for case: r = 0; Interaction must be removed
+		// by special bond
+        r2inv = 1.0/rsq;
+
+        if (rsq < cut_coulsq) {
+          r = sqrt(rsq);
+          prefactor = qqrd2e*qtmp*q[j]/r;
+          erfcc = erfc(alf*r);
+          erfcd = exp(-alf*alf*r*r);
+          v_sh = (erfcc - e_shift*r) * prefactor;
+          dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift;
+          forcecoul = dvdrr*rsq*prefactor;
+          if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
+        } else forcecoul = 0.0;
+
+        if (rsq < cut_ljsq[itype][jtype]) {
+          r6inv = r2inv*r2inv*r2inv;
+          r = sqrt(rsq);
+          rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]);
+          forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv
+            + born3[itype][jtype]*r2inv*r6inv;
+        } else forceborn = 0.0;
+
+        fpair = (forcecoul + factor_lj*forceborn) * r2inv;
+
+        f[i][0] += delx*fpair;
+        f[i][1] += dely*fpair;
+        f[i][2] += delz*fpair;
+        if (newton_pair || j < nlocal) {
+          f[j][0] -= delx*fpair;
+          f[j][1] -= dely*fpair;
+          f[j][2] -= delz*fpair;
+        }
+
+        if (eflag) {
+          if (rsq < cut_coulsq) {
+            ecoul = v_sh;
+            if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
+          } else ecoul = 0.0;
+          if (rsq < cut_ljsq[itype][jtype]) {
+            evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv +
+              d[itype][jtype]*r6inv*r2inv - offset[itype][jtype];
+            evdwl *= factor_lj;
+          } else evdwl = 0.0;
+        }
+
+        if (evflag) ev_tally(i,j,nlocal,newton_pair,
+                             evdwl,ecoul,fpair,delx,dely,delz);
+      }
+    }
+  }
+
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+/*
+Using erfc and expmsq provided by math_special.h
+
+See: http://lammps.sandia.gov/threads/msg61934.html
+*/
diff --git a/src/CORESHELL/pair_born_coul_wolf_cs.h b/src/CORESHELL/pair_born_coul_wolf_cs.h
new file mode 100644
index 0000000000000000000000000000000000000000..00bbd5874c2694ca54b8ec0b683aef3fc2205594
--- /dev/null
+++ b/src/CORESHELL/pair_born_coul_wolf_cs.h
@@ -0,0 +1,60 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(born/coul/wolf/cs,PairBornCoulWolfCS)
+
+#else
+
+#ifndef LMP_PAIR_BORN_COUL_WOLF_CS_H
+#define LMP_PAIR_BORN_COUL_WOLF_CS_H
+
+#include "pair_born_coul_wolf.h"
+
+namespace LAMMPS_NS {
+
+class PairBornCoulWolfCS : public PairBornCoulWolf {
+ public:
+  PairBornCoulWolfCS(class LAMMPS *);
+  virtual void compute(int, int);
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+E: Illegal ... command
+
+Self-explanatory.  Check the input script syntax and compare to the
+documentation for the command.  You can use -echo screen as a
+command-line option when running LAMMPS to see the offending line.
+
+E: Incorrect args for pair coefficients
+
+Self-explanatory.  Check the input script or data file.
+
+E: All pair coeffs are not set
+
+All pair coefficients must be set in the data file or by the
+pair_coeff command before running a simulation.
+
+E: Pair style born/coul/wolf/cs requires atom attribute q
+
+An atom style that defines this attribute must be used.
+
+
+*/
diff --git a/src/CORESHELL/pair_coul_wolf_cs.cpp b/src/CORESHELL/pair_coul_wolf_cs.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..15de7c677733fb1a89be6884aa879fee685ba2a1
--- /dev/null
+++ b/src/CORESHELL/pair_coul_wolf_cs.cpp
@@ -0,0 +1,145 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "pair_coul_wolf_cs.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "math_const.h"
+#include "math_special.h"
+#include "memory.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+using namespace MathSpecial;
+
+#define EPSILON 1.0e-20
+
+/* ---------------------------------------------------------------------- */
+
+PairCoulWolfCS::PairCoulWolfCS(LAMMPS *lmp) : PairCoulWolf( lmp )
+{
+   single_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairCoulWolfCS::compute(int eflag, int vflag)
+{
+  int i,j,ii,jj,inum,jnum;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
+  double rsq,forcecoul,factor_coul;
+  double prefactor;
+  double r;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+  double erfcc,erfcd,v_sh,dvdrr,e_self,e_shift,f_shift,qisq;
+
+  ecoul = 0.0;
+  if (eflag || vflag) ev_setup(eflag,vflag);
+  else evflag = vflag_fdotr = 0;
+
+  double **x = atom->x;
+  double **f = atom->f;
+  double *q = atom->q;
+  int nlocal = atom->nlocal;
+  double *special_coul = force->special_coul;
+  int newton_pair = force->newton_pair;
+  double qqrd2e = force->qqrd2e;
+
+  // self and shifted coulombic energy
+
+  e_self = v_sh = 0.0;
+  e_shift = erfc(alf*cut_coul)/cut_coul;
+  f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) /
+    cut_coul;
+
+  inum = list->inum;
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = 0; ii < inum; ii++) {
+    i = ilist[ii];
+    qtmp = q[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    qisq = qtmp*qtmp;
+    e_self = -(e_shift/2.0 + alf/MY_PIS) * qisq*qqrd2e;
+    if (evflag) ev_tally(i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0);
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_coul = special_coul[sbmask(j)];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+
+      if (rsq < cut_coulsq) {
+	rsq += EPSILON;
+	// Add EPISLON for case: r = 0; Interaction must be removed
+	// by special bond
+	r = sqrt(rsq);
+	prefactor = qqrd2e*qtmp*q[j]/r;
+	erfcc = erfc(alf*r);
+	erfcd = exp(-alf*alf*r*r);
+	v_sh = (erfcc - e_shift*r) * prefactor;
+	dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift;
+	forcecoul = dvdrr*rsq*prefactor;
+	if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
+	fpair = forcecoul / rsq;
+
+	f[i][0] += delx*fpair;
+	f[i][1] += dely*fpair;
+	f[i][2] += delz*fpair;
+	if (newton_pair || j < nlocal) {
+	  f[j][0] -= delx*fpair;
+	  f[j][1] -= dely*fpair;
+	  f[j][2] -= delz*fpair;
+	}
+
+	if (eflag) {
+	  ecoul = v_sh;
+	  if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
+	} else ecoul = 0.0;
+
+	if (evflag) ev_tally(i,j,nlocal,newton_pair,
+			     0.0,ecoul,fpair,delx,dely,delz);
+      }
+    }
+  }
+
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+/* NOTES
+Using erfc and expmsq provided by math_special.h
+
+See: http://lammps.sandia.gov/threads/msg61934.html
+*/
diff --git a/src/CORESHELL/pair_coul_wolf_cs.h b/src/CORESHELL/pair_coul_wolf_cs.h
new file mode 100644
index 0000000000000000000000000000000000000000..8d98b1bbc6d44cbb837f8f68e4eb7fc2f04e74c4
--- /dev/null
+++ b/src/CORESHELL/pair_coul_wolf_cs.h
@@ -0,0 +1,54 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(coul/wolf/cs,PairCoulWolfCS)
+
+#else
+
+#ifndef LMP_PAIR_COUL_WOLF_CS_H_
+#define LMP_PAIR_COUL_WOLF_CS_H_
+
+#include "pair_coul_wolf.h"
+
+namespace LAMMPS_NS {
+
+class PairCoulWolfCS : public PairCoulWolf {
+ public:
+  PairCoulWolfCS( class LAMMPS *);
+  virtual void compute( int, int);
+};
+
+}
+
+#endif
+#endif /* LMP_PAIR_COUL_WOLF_CS_H_ */
+
+/* ERROR/WARNING messages:
+
+E: Illegal ... command
+
+Self-explanatory.  Check the input script syntax and compare to the
+documentation for the command.  You can use -echo screen as a
+command-line option when running LAMMPS to see the offending line.
+
+E: Incorrect args for pair coefficients
+
+Self-explanatory.  Check the input script or data file.
+
+E: Pair coul/wolf/cs requires atom attribute q
+
+The atom style defined does not have this attribute.
+
+*/
diff --git a/src/CORESHELL/pair_lj_cut_coul_long_cs.cpp b/src/CORESHELL/pair_lj_cut_coul_long_cs.cpp
index e2ffda148f1627d6726adc08c58e31b57f3041db..d418cf20afe44020db9d05c49b1c1eb4c985af64 100644
--- a/src/CORESHELL/pair_lj_cut_coul_long_cs.cpp
+++ b/src/CORESHELL/pair_lj_cut_coul_long_cs.cpp
@@ -225,10 +225,10 @@ void PairLJCutCoulLongCS::compute_inner()
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
-  inum = listinner->inum;
-  ilist = listinner->ilist;
-  numneigh = listinner->numneigh;
-  firstneigh = listinner->firstneigh;
+  inum = list->inum_inner;
+  ilist = list->ilist_inner;
+  numneigh = list->numneigh_inner;
+  firstneigh = list->firstneigh_inner;
 
   double cut_out_on = cut_respa[0];
   double cut_out_off = cut_respa[1];
@@ -311,10 +311,10 @@ void PairLJCutCoulLongCS::compute_middle()
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
-  inum = listmiddle->inum;
-  ilist = listmiddle->ilist;
-  numneigh = listmiddle->numneigh;
-  firstneigh = listmiddle->firstneigh;
+  inum = list->inum_middle;
+  ilist = list->ilist_middle;
+  numneigh = list->numneigh_middle;
+  firstneigh = list->firstneigh_middle;
 
   double cut_in_off = cut_respa[0];
   double cut_in_on = cut_respa[1];
@@ -412,10 +412,10 @@ void PairLJCutCoulLongCS::compute_outer(int eflag, int vflag)
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
-  inum = listouter->inum;
-  ilist = listouter->ilist;
-  numneigh = listouter->numneigh;
-  firstneigh = listouter->firstneigh;
+  inum = list->inum;
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
 
   double cut_in_off = cut_respa[2];
   double cut_in_on = cut_respa[3];
diff --git a/src/DIPOLE/pair_lj_long_dipole_long.cpp b/src/DIPOLE/pair_lj_long_dipole_long.cpp
index b833b250d4a644849a3875ba1d9a74f8e403540a..c9b2b3f4afe5dba0aba0bf5f7ebcad1b433e7d1d 100644
--- a/src/DIPOLE/pair_lj_long_dipole_long.cpp
+++ b/src/DIPOLE/pair_lj_long_dipole_long.cpp
@@ -263,22 +263,6 @@ void PairLJLongDipoleLong::init_style()
   if (force->kspace) g_ewald = force->kspace->g_ewald;
 }
 
-/* ----------------------------------------------------------------------
-   neighbor callback to inform pair style of neighbor list to use
-   regular or rRESPA
-------------------------------------------------------------------------- */
-
-void PairLJLongDipoleLong::init_list(int id, NeighList *ptr)
-{
-  if (id == 0) list = ptr;
-  else if (id == 1) listinner = ptr;
-  else if (id == 2) listmiddle = ptr;
-  else if (id == 3) listouter = ptr;
-
-  if (id)
-    error->all(FLERR,"Pair style lj/long/dipole/long does not currently support respa");
-}
-
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
diff --git a/src/DIPOLE/pair_lj_long_dipole_long.h b/src/DIPOLE/pair_lj_long_dipole_long.h
index f9fa10af11bcd079df4a1bb9b5677f6c555efca5..2ace9ca30163daa2c6c159e0b47ef8c165e8e708 100644
--- a/src/DIPOLE/pair_lj_long_dipole_long.h
+++ b/src/DIPOLE/pair_lj_long_dipole_long.h
@@ -34,7 +34,6 @@ class PairLJLongDipoleLong : public Pair {
   virtual void settings(int, char **);
   void coeff(int, char **);
   void init_style();
-  void init_list(int, class NeighList *);
   double init_one(int, int);
   void write_restart(FILE *);
   void read_restart(FILE *);
diff --git a/src/Depend.sh b/src/Depend.sh
index 94636079603b3c5c2f5543be735eb8c7a3eab97c..e1c812ebc26f740d98cc3060ec2636ea31ab103b 100644
--- a/src/Depend.sh
+++ b/src/Depend.sh
@@ -119,6 +119,10 @@ if (test $1 = "USER-DPD") then
   depend KOKKOS
 fi
 
+if (test $1 = "USER-DRUDE") then
+  depend USER-OMP
+fi
+
 if (test $1 = "USER-FEP") then
   depend USER-OMP
 fi
diff --git a/src/GRANULAR/pair_gran_hertz_history.cpp b/src/GRANULAR/pair_gran_hertz_history.cpp
index e52aac10dbbb92ef15a8572115cd21756f35c9f9..9723531625fdf44b81fa727538a394f9f24a157e 100644
--- a/src/GRANULAR/pair_gran_hertz_history.cpp
+++ b/src/GRANULAR/pair_gran_hertz_history.cpp
@@ -24,6 +24,7 @@
 #include "update.h"
 #include "force.h"
 #include "fix.h"
+#include "fix_neigh_history.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "comm.h"
@@ -95,8 +96,8 @@ void PairGranHertzHistory::compute(int eflag, int vflag)
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
-  firsttouch = list->listhistory->firstneigh;
-  firstshear = list->listhistory->firstdouble;
+  firsttouch = fix_history->firstflag;
+  firstshear = fix_history->firstvalue;
 
   // loop over neighbors of my atoms
 
@@ -407,7 +408,7 @@ double PairGranHertzHistory::single(int i, int j, int itype, int jtype,
 
   int jnum = list->numneigh[i];
   int *jlist = list->firstneigh[i];
-  double *allshear = list->listhistory->firstdouble[i];
+  double *allshear = fix_history->firstvalue[i];
 
   for (int jj = 0; jj < jnum; jj++) {
     neighprev++;
diff --git a/src/GRANULAR/pair_gran_hooke_history.cpp b/src/GRANULAR/pair_gran_hooke_history.cpp
index e9662c9e7341ab77c6b76b69ac3ac773df501f4f..4f120150ded77ccfdaf5e839f2b5cc68a39ddfd7 100644
--- a/src/GRANULAR/pair_gran_hooke_history.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history.cpp
@@ -27,7 +27,7 @@
 #include "update.h"
 #include "modify.h"
 #include "fix.h"
-#include "fix_shear_history.h"
+#include "fix_neigh_history.h"
 #include "comm.h"
 #include "neighbor.h"
 #include "neigh_list.h"
@@ -64,7 +64,7 @@ PairGranHookeHistory::PairGranHookeHistory(LAMMPS *lmp) : Pair(lmp)
 PairGranHookeHistory::~PairGranHookeHistory()
 {
   delete [] svector;
-  if (fix_history) modify->delete_fix("SHEAR_HISTORY");
+  if (fix_history) modify->delete_fix("NEIGH_HISTORY");
 
   if (allocated) {
     memory->destroy(setflag);
@@ -137,8 +137,8 @@ void PairGranHookeHistory::compute(int eflag, int vflag)
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
-  firsttouch = listhistory->firstneigh;
-  firstshear = listhistory->firstdouble;
+  firsttouch = fix_history->firstflag;
+  firstshear = fix_history->firstvalue;
 
   // loop over neighbors of my atoms
 
@@ -400,35 +400,28 @@ void PairGranHookeHistory::init_style()
   if (comm->ghost_velocity == 0)
     error->all(FLERR,"Pair granular requires ghost atoms store velocity");
 
-  // need a granular neigh list and optionally a granular history neigh list
+  // need a granular neigh list
 
   int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->size = 1;
-  if (history) {
-    irequest = neighbor->request(this,instance_me);
-    neighbor->requests[irequest]->id = 1;
-    neighbor->requests[irequest]->history = 1;
-    neighbor->requests[irequest]->dnum = 3;
-  }
+  if (history) neighbor->requests[irequest]->history = 1;
 
   dt = update->dt;
 
-  // if shear history is stored:
   // if first init, create Fix needed for storing shear history
 
   if (history && fix_history == NULL) {
     char dnumstr[16];
     sprintf(dnumstr,"%d",3);
     char **fixarg = new char*[4];
-    fixarg[0] = (char *) "SHEAR_HISTORY";
+    fixarg[0] = (char *) "NEIGH_HISTORY";
     fixarg[1] = (char *) "all";
-    fixarg[2] = (char *) "SHEAR_HISTORY";
+    fixarg[2] = (char *) "NEIGH_HISTORY";
     fixarg[3] = dnumstr;
-    modify->add_fix(4,fixarg);
+    modify->add_fix(4,fixarg,1);
     delete [] fixarg;
-    fix_history = (FixShearHistory *) modify->fix[modify->nfix-1];
+    fix_history = (FixNeighHistory *) modify->fix[modify->nfix-1];
     fix_history->pair = this;
-    neighbor->requests[irequest]->fix_history = fix_history;
   }
 
   // check for FixFreeze and set freeze_group_bit
@@ -494,23 +487,12 @@ void PairGranHookeHistory::init_style()
   // set fix which stores history info
 
   if (history) {
-    int ifix = modify->find_fix("SHEAR_HISTORY");
-    if (ifix < 0) error->all(FLERR,"Could not find pair fix ID");
-    fix_history = (FixShearHistory *) modify->fix[ifix];
+    int ifix = modify->find_fix("NEIGH_HISTORY");
+    if (ifix < 0) error->all(FLERR,"Could not find pair fix neigh history ID");
+    fix_history = (FixNeighHistory *) modify->fix[ifix];
   }
 }
 
-/* ----------------------------------------------------------------------
-   neighbor callback to inform pair style of neighbor list to use
-   optional granular history list
-------------------------------------------------------------------------- */
-
-void PairGranHookeHistory::init_list(int id, NeighList *ptr)
-{
-  if (id == 0) list = ptr;
-  else if (id == 1) listhistory = ptr;
-}
-
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
@@ -704,7 +686,7 @@ double PairGranHookeHistory::single(int i, int j, int itype, int jtype,
 
   int jnum = list->numneigh[i];
   int *jlist = list->firstneigh[i];
-  double *allshear = list->listhistory->firstdouble[i];
+  double *allshear = fix_history->firstvalue[i];
 
   for (int jj = 0; jj < jnum; jj++) {
     neighprev++;
@@ -797,14 +779,3 @@ double PairGranHookeHistory::memory_usage()
   double bytes = nmax * sizeof(double);
   return bytes;
 }
-
-/* ----------------------------------------------------------------------
-   return ptr to FixShearHistory class
-   called by Neighbor when setting up neighbor lists
-------------------------------------------------------------------------- */
-
-void *PairGranHookeHistory::extract(const char *str, int &dim)
-{
-  if (strcmp(str,"history") == 0) return (void *) fix_history;
-  return NULL;
-}
diff --git a/src/GRANULAR/pair_gran_hooke_history.h b/src/GRANULAR/pair_gran_hooke_history.h
index afeab93413c27257f784842857fef631d7fc249c..f02cccd55ef177ba71e022447d49c9c71036b1cd 100644
--- a/src/GRANULAR/pair_gran_hooke_history.h
+++ b/src/GRANULAR/pair_gran_hooke_history.h
@@ -32,7 +32,6 @@ class PairGranHookeHistory : public Pair {
   virtual void settings(int, char **);
   void coeff(int, char **);
   void init_style();
-  void init_list(int, class NeighList *);
   double init_one(int, int);
   void write_restart(FILE *);
   void read_restart(FILE *);
@@ -43,7 +42,6 @@ class PairGranHookeHistory : public Pair {
   int pack_forward_comm(int, int *, double *, int, int *);
   void unpack_forward_comm(int, int, double *);
   double memory_usage();
-  void *extract(const char *, int &);
 
  protected:
   double kn,kt,gamman,gammat,xmu;
@@ -56,7 +54,7 @@ class PairGranHookeHistory : public Pair {
   double *onerad_dynamic,*onerad_frozen;
   double *maxrad_dynamic,*maxrad_frozen;
 
-  class FixShearHistory *fix_history;
+  class FixNeighHistory *fix_history;
 
   // storage of rigid body masses for use in granular interactions
 
diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.cpp b/src/KOKKOS/atom_vec_atomic_kokkos.cpp
index b63dc5fb8c94aad0ac9312fad0268a89d2f4d62d..6c610c8c111409d8c18201d9be23e44a4fe9195c 100644
--- a/src/KOKKOS/atom_vec_atomic_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_atomic_kokkos.cpp
@@ -136,450 +136,6 @@ void AtomVecAtomicKokkos::copy(int i, int j, int delflag)
 
 /* ---------------------------------------------------------------------- */
 
-template<class DeviceType,int PBC_FLAG,int TRICLINIC>
-struct AtomVecAtomicKokkos_PackComm {
-  typedef DeviceType device_type;
-
-  typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
-  typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
-  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
-  const int _iswap;
-  X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
-  X_FLOAT _pbc[6];
-
-  AtomVecAtomicKokkos_PackComm(
-      const typename DAT::tdual_x_array &x,
-      const typename DAT::tdual_xfloat_2d &buf,
-      const typename DAT::tdual_int_2d &list,
-      const int & iswap,
-      const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
-      const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
-      _x(x.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap),
-      _xprd(xprd),_yprd(yprd),_zprd(zprd),
-      _xy(xy),_xz(xz),_yz(yz) {
-        const size_t maxsend = (buf.view<DeviceType>().dimension_0()*buf.view<DeviceType>().dimension_1())/3;
-        const size_t elements = 3;
-        buffer_view<DeviceType>(_buf,buf,maxsend,elements);
-        _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
-        _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
-  };
-
-  KOKKOS_INLINE_FUNCTION
-  void operator() (const int& i) const {
-        const int j = _list(_iswap,i);
-      if (PBC_FLAG == 0) {
-          _buf(i,0) = _x(j,0);
-          _buf(i,1) = _x(j,1);
-          _buf(i,2) = _x(j,2);
-      } else {
-        if (TRICLINIC == 0) {
-          _buf(i,0) = _x(j,0) + _pbc[0]*_xprd;
-          _buf(i,1) = _x(j,1) + _pbc[1]*_yprd;
-          _buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
-        } else {
-          _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
-          _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
-          _buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
-        }
-      }
-  }
-};
-
-/* ---------------------------------------------------------------------- */
-
-int AtomVecAtomicKokkos::pack_comm_kokkos(const int &n,
-                                          const DAT::tdual_int_2d &list,
-                                          const int & iswap,
-                                          const DAT::tdual_xfloat_2d &buf,
-                                          const int &pbc_flag,
-                                          const int* const pbc)
-{
-  // Check whether to always run forward communication on the host
-  // Choose correct forward PackComm kernel
-
-  if(commKK->forward_comm_on_host) {
-    sync(Host,X_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
-        struct AtomVecAtomicKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,buf,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      } else {
-        struct AtomVecAtomicKokkos_PackComm<LMPHostType,1,0> f(atomKK->k_x,buf,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      }
-    } else {
-      if(domain->triclinic) {
-        struct AtomVecAtomicKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,buf,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      } else {
-        struct AtomVecAtomicKokkos_PackComm<LMPHostType,0,0> f(atomKK->k_x,buf,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      }
-    }
-  } else {
-    sync(Device,X_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
-        struct AtomVecAtomicKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,buf,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      } else {
-        struct AtomVecAtomicKokkos_PackComm<LMPDeviceType,1,0> f(atomKK->k_x,buf,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      }
-    } else {
-      if(domain->triclinic) {
-        struct AtomVecAtomicKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,buf,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      } else {
-        struct AtomVecAtomicKokkos_PackComm<LMPDeviceType,0,0> f(atomKK->k_x,buf,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      }
-    }
-  }
-
-	return n*size_forward;
-}
-
-/* ---------------------------------------------------------------------- */
-
-template<class DeviceType,int PBC_FLAG,int TRICLINIC>
-struct AtomVecAtomicKokkos_PackCommSelf {
-  typedef DeviceType device_type;
-
-  typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
-  typename ArrayTypes<DeviceType>::t_x_array _xw;
-  int _nfirst;
-  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
-  const int _iswap;
-  X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
-  X_FLOAT _pbc[6];
-
-  AtomVecAtomicKokkos_PackCommSelf(
-      const typename DAT::tdual_x_array &x,
-      const int &nfirst,
-      const typename DAT::tdual_int_2d &list,
-      const int & iswap,
-      const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
-      const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
-      _x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),_nfirst(nfirst),_list(list.view<DeviceType>()),_iswap(iswap),
-      _xprd(xprd),_yprd(yprd),_zprd(zprd),
-      _xy(xy),_xz(xz),_yz(yz) {
-        _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
-        _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
-  };
-
-  KOKKOS_INLINE_FUNCTION
-  void operator() (const int& i) const {
-        const int j = _list(_iswap,i);
-      if (PBC_FLAG == 0) {
-          _xw(i+_nfirst,0) = _x(j,0);
-          _xw(i+_nfirst,1) = _x(j,1);
-          _xw(i+_nfirst,2) = _x(j,2);
-      } else {
-        if (TRICLINIC == 0) {
-          _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd;
-          _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd;
-          _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
-        } else {
-          _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
-          _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
-          _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
-        }
-      }
-
-  }
-};
-
-/* ---------------------------------------------------------------------- */
-
-int AtomVecAtomicKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap,
-										const int nfirst, const int &pbc_flag, const int* const pbc) {
-  if(commKK->forward_comm_on_host) {
-    sync(Host,X_MASK);
-    modified(Host,X_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
-      struct AtomVecAtomicKokkos_PackCommSelf<LMPHostType,1,1> f(atomKK->k_x,nfirst,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      } else {
-      struct AtomVecAtomicKokkos_PackCommSelf<LMPHostType,1,0> f(atomKK->k_x,nfirst,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      }
-    } else {
-      if(domain->triclinic) {
-      struct AtomVecAtomicKokkos_PackCommSelf<LMPHostType,0,1> f(atomKK->k_x,nfirst,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      } else {
-      struct AtomVecAtomicKokkos_PackCommSelf<LMPHostType,0,0> f(atomKK->k_x,nfirst,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      }
-    }
-  } else {
-    sync(Device,X_MASK);
-    modified(Device,X_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
-      struct AtomVecAtomicKokkos_PackCommSelf<LMPDeviceType,1,1> f(atomKK->k_x,nfirst,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      } else {
-      struct AtomVecAtomicKokkos_PackCommSelf<LMPDeviceType,1,0> f(atomKK->k_x,nfirst,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      }
-    } else {
-      if(domain->triclinic) {
-      struct AtomVecAtomicKokkos_PackCommSelf<LMPDeviceType,0,1> f(atomKK->k_x,nfirst,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      } else {
-      struct AtomVecAtomicKokkos_PackCommSelf<LMPDeviceType,0,0> f(atomKK->k_x,nfirst,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      }
-    }
-  }
-	return n*3;
-}
-
-/* ---------------------------------------------------------------------- */
-
-template<class DeviceType>
-struct AtomVecAtomicKokkos_UnpackComm {
-  typedef DeviceType device_type;
-
-  typename ArrayTypes<DeviceType>::t_x_array _x;
-  typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf;
-  int _first;
-
-  AtomVecAtomicKokkos_UnpackComm(
-      const typename DAT::tdual_x_array &x,
-      const typename DAT::tdual_xfloat_2d &buf,
-      const int& first):_x(x.view<DeviceType>()),_buf(buf.view<DeviceType>()),
-                        _first(first) {};
-
-  KOKKOS_INLINE_FUNCTION
-  void operator() (const int& i) const {
-      _x(i+_first,0) = _buf(i,0);
-      _x(i+_first,1) = _buf(i,1);
-      _x(i+_first,2) = _buf(i,2);
-  }
-};
-
-/* ---------------------------------------------------------------------- */
-
-void AtomVecAtomicKokkos::unpack_comm_kokkos(const int &n, const int &first,
-    const DAT::tdual_xfloat_2d &buf ) {
-  if(commKK->forward_comm_on_host) {
-    sync(Host,X_MASK);
-    modified(Host,X_MASK);
-    struct AtomVecAtomicKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
-    Kokkos::parallel_for(n,f);
-  } else {
-    sync(Device,X_MASK);
-    modified(Device,X_MASK);
-    struct AtomVecAtomicKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
-    Kokkos::parallel_for(n,f);
-  }
-}
-
-/* ---------------------------------------------------------------------- */
-
-int AtomVecAtomicKokkos::pack_comm(int n, int *list, double *buf,
-                             int pbc_flag, int *pbc)
-{
-  int i,j,m;
-  double dx,dy,dz;
-
-  m = 0;
-  if (pbc_flag == 0) {
-    for (i = 0; i < n; i++) {
-      j = list[i];
-      buf[m++] = h_x(j,0);
-      buf[m++] = h_x(j,1);
-      buf[m++] = h_x(j,2);
-    }
-  } else {
-    if (domain->triclinic == 0) {
-      dx = pbc[0]*domain->xprd;
-      dy = pbc[1]*domain->yprd;
-      dz = pbc[2]*domain->zprd;
-    } else {
-      dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
-      dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
-      dz = pbc[2]*domain->zprd;
-    }
-    for (i = 0; i < n; i++) {
-      j = list[i];
-      buf[m++] = h_x(j,0) + dx;
-      buf[m++] = h_x(j,1) + dy;
-      buf[m++] = h_x(j,2) + dz;
-    }
-  }
-  return m;
-}
-
-/* ---------------------------------------------------------------------- */
-
-int AtomVecAtomicKokkos::pack_comm_vel(int n, int *list, double *buf,
-                                 int pbc_flag, int *pbc)
-{
-  int i,j,m;
-  double dx,dy,dz,dvx,dvy,dvz;
-
-  m = 0;
-  if (pbc_flag == 0) {
-    for (i = 0; i < n; i++) {
-      j = list[i];
-      buf[m++] = h_x(j,0);
-      buf[m++] = h_x(j,1);
-      buf[m++] = h_x(j,2);
-      buf[m++] = h_v(j,0);
-      buf[m++] = h_v(j,1);
-      buf[m++] = h_v(j,2);
-    }
-  } else {
-    if (domain->triclinic == 0) {
-      dx = pbc[0]*domain->xprd;
-      dy = pbc[1]*domain->yprd;
-      dz = pbc[2]*domain->zprd;
-    } else {
-      dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
-      dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
-      dz = pbc[2]*domain->zprd;
-    }
-    if (!deform_vremap) {
-      for (i = 0; i < n; i++) {
-        j = list[i];
-        buf[m++] = h_x(j,0) + dx;
-        buf[m++] = h_x(j,1) + dy;
-        buf[m++] = h_x(j,2) + dz;
-        buf[m++] = h_v(j,0);
-        buf[m++] = h_v(j,1);
-        buf[m++] = h_v(j,2);
-      }
-    } else {
-      dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
-      dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
-      dvz = pbc[2]*h_rate[2];
-      for (i = 0; i < n; i++) {
-        j = list[i];
-        buf[m++] = h_x(j,0) + dx;
-        buf[m++] = h_x(j,1) + dy;
-        buf[m++] = h_x(j,2) + dz;
-        if (mask[i] & deform_groupbit) {
-          buf[m++] = h_v(j,0) + dvx;
-          buf[m++] = h_v(j,1) + dvy;
-          buf[m++] = h_v(j,2) + dvz;
-        } else {
-          buf[m++] = h_v(j,0);
-          buf[m++] = h_v(j,1);
-          buf[m++] = h_v(j,2);
-        }
-      }
-    }
-  }
-  return m;
-}
-
-/* ---------------------------------------------------------------------- */
-
-void AtomVecAtomicKokkos::unpack_comm(int n, int first, double *buf)
-{
-  int i,m,last;
-
-  m = 0;
-  last = first + n;
-  for (i = first; i < last; i++) {
-    h_x(i,0) = buf[m++];
-    h_x(i,1) = buf[m++];
-    h_x(i,2) = buf[m++];
-  }
-}
-
-/* ---------------------------------------------------------------------- */
-
-void AtomVecAtomicKokkos::unpack_comm_vel(int n, int first, double *buf)
-{
-  int i,m,last;
-
-  m = 0;
-  last = first + n;
-  for (i = first; i < last; i++) {
-    h_x(i,0) = buf[m++];
-    h_x(i,1) = buf[m++];
-    h_x(i,2) = buf[m++];
-    h_v(i,0) = buf[m++];
-    h_v(i,1) = buf[m++];
-    h_v(i,2) = buf[m++];
-  }
-}
-
-/* ---------------------------------------------------------------------- */
-
-int AtomVecAtomicKokkos::pack_reverse(int n, int first, double *buf)
-{
-  if(n > 0)
-    sync(Host,F_MASK);
-
-  int m = 0;
-  const int last = first + n;
-  for (int i = first; i < last; i++) {
-    buf[m++] = h_f(i,0);
-    buf[m++] = h_f(i,1);
-    buf[m++] = h_f(i,2);
-  }
-  return m;
-}
-
-/* ---------------------------------------------------------------------- */
-
-void AtomVecAtomicKokkos::unpack_reverse(int n, int *list, double *buf)
-{
-  if(n > 0) {
-    sync(Host,F_MASK);
-    modified(Host,F_MASK);
-  }
-
-  int m = 0;
-  for (int i = 0; i < n; i++) {
-    const int j = list[i];
-    h_f(j,0) += buf[m++];
-    h_f(j,1) += buf[m++];
-    h_f(j,2) += buf[m++];
-  }
-}
-
-/* ---------------------------------------------------------------------- */
-
 template<class DeviceType,int PBC_FLAG>
 struct AtomVecAtomicKokkos_PackBorder {
   typedef DeviceType device_type;
diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.h b/src/KOKKOS/atom_vec_atomic_kokkos.h
index 5e9a72c2e377c18c9435744fda21f70844d9c4a7..e4d2654e2cb14e6bcd95e812e6204d33ed0c6304 100644
--- a/src/KOKKOS/atom_vec_atomic_kokkos.h
+++ b/src/KOKKOS/atom_vec_atomic_kokkos.h
@@ -33,12 +33,6 @@ class AtomVecAtomicKokkos : public AtomVecKokkos {
   virtual ~AtomVecAtomicKokkos() {}
   void grow(int);
   void copy(int, int, int);
-  int pack_comm(int, int *, double *, int, int *);
-  int pack_comm_vel(int, int *, double *, int, int *);
-  void unpack_comm(int, int, double *);
-  void unpack_comm_vel(int, int, double *);
-  int pack_reverse(int, int, double *);
-  void unpack_reverse(int, int *, double *);
   int pack_border(int, int *, double *, int, int *);
   int pack_border_vel(int, int *, double *, int, int *);
   void unpack_border(int, int, double *);
@@ -55,15 +49,6 @@ class AtomVecAtomicKokkos : public AtomVecKokkos {
   bigint memory_usage();
 
   void grow_reset();
-  int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
-                       const int & iswap,
-                       const DAT::tdual_xfloat_2d &buf,
-                       const int &pbc_flag, const int pbc[]);
-  void unpack_comm_kokkos(const int &n, const int &nfirst,
-                          const DAT::tdual_xfloat_2d &buf);
-  int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
-                     const int & iswap, const int nfirst,
-                     const int &pbc_flag, const int pbc[]);
   int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
                          DAT::tdual_xfloat_2d buf,int iswap,
                          int pbc_flag, int *pbc, ExecutionSpace space);
@@ -99,9 +84,6 @@ class AtomVecAtomicKokkos : public AtomVecKokkos {
   DAT::t_x_array d_x;
   DAT::t_v_array d_v;
   DAT::t_f_array d_f;
-  HAT::t_x_array h_x;
-  HAT::t_v_array h_v;
-  HAT::t_f_array h_f;
 
   DAT::tdual_int_1d k_count;
 };
diff --git a/src/KOKKOS/atom_vec_bond_kokkos.cpp b/src/KOKKOS/atom_vec_bond_kokkos.cpp
index e0f29a27bb7f251aa4816e4bd1a19f43f6f2a39b..076144420c1442c5db69008905263313b4757be2 100644
--- a/src/KOKKOS/atom_vec_bond_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_bond_kokkos.cpp
@@ -178,448 +178,6 @@ void AtomVecBondKokkos::copy(int i, int j, int delflag)
 
 /* ---------------------------------------------------------------------- */
 
-template<class DeviceType,int PBC_FLAG,int TRICLINIC>
-struct AtomVecBondKokkos_PackComm {
-  typedef DeviceType device_type;
-
-  typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
-  typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
-  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
-  const int _iswap;
-  X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
-  X_FLOAT _pbc[6];
-
-  AtomVecBondKokkos_PackComm(
-      const typename DAT::tdual_x_array &x,
-      const typename DAT::tdual_xfloat_2d &buf,
-      const typename DAT::tdual_int_2d &list,
-      const int & iswap,
-      const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
-      const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
-      _x(x.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap),
-      _xprd(xprd),_yprd(yprd),_zprd(zprd),
-      _xy(xy),_xz(xz),_yz(yz) {
-        const size_t maxsend = (buf.view<DeviceType>().dimension_0()*buf.view<DeviceType>().dimension_1())/3;
-        const size_t elements = 3;
-        buffer_view<DeviceType>(_buf,buf,maxsend,elements);
-        _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
-        _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
-  };
-
-  KOKKOS_INLINE_FUNCTION
-  void operator() (const int& i) const {
-        const int j = _list(_iswap,i);
-      if (PBC_FLAG == 0) {
-          _buf(i,0) = _x(j,0);
-          _buf(i,1) = _x(j,1);
-          _buf(i,2) = _x(j,2);
-      } else {
-        if (TRICLINIC == 0) {
-          _buf(i,0) = _x(j,0) + _pbc[0]*_xprd;
-          _buf(i,1) = _x(j,1) + _pbc[1]*_yprd;
-          _buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
-        } else {
-          _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
-          _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
-          _buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
-        }
-      }
-  }
-};
-
-/* ---------------------------------------------------------------------- */
-
-int AtomVecBondKokkos::pack_comm_kokkos(const int &n,
-                                        const DAT::tdual_int_2d &list,
-                                        const int & iswap,
-                                        const DAT::tdual_xfloat_2d &buf,
-                                        const int &pbc_flag,
-                                        const int* const pbc)
-{
-  // Check whether to always run forward communication on the host
-  // Choose correct forward PackComm kernel
-
-  if(commKK->forward_comm_on_host) {
-    sync(Host,X_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
-        struct AtomVecBondKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,buf,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      } else {
-        struct AtomVecBondKokkos_PackComm<LMPHostType,1,0> f(atomKK->k_x,buf,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      }
-    } else {
-      if(domain->triclinic) {
-        struct AtomVecBondKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,buf,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      } else {
-        struct AtomVecBondKokkos_PackComm<LMPHostType,0,0> f(atomKK->k_x,buf,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      }
-    }
-  } else {
-    sync(Device,X_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
-        struct AtomVecBondKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,buf,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      } else {
-        struct AtomVecBondKokkos_PackComm<LMPDeviceType,1,0> f(atomKK->k_x,buf,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      }
-    } else {
-      if(domain->triclinic) {
-        struct AtomVecBondKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,buf,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      } else {
-        struct AtomVecBondKokkos_PackComm<LMPDeviceType,0,0> f(atomKK->k_x,buf,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      }
-    }
-  }
-
-	return n*size_forward;
-}
-
-/* ---------------------------------------------------------------------- */
-
-template<class DeviceType,int PBC_FLAG,int TRICLINIC>
-struct AtomVecBondKokkos_PackCommSelf {
-  typedef DeviceType device_type;
-
-  typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
-  typename ArrayTypes<DeviceType>::t_x_array _xw;
-  int _nfirst;
-  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
-  const int _iswap;
-  X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
-  X_FLOAT _pbc[6];
-
-  AtomVecBondKokkos_PackCommSelf(
-      const typename DAT::tdual_x_array &x,
-      const int &nfirst,
-      const typename DAT::tdual_int_2d &list,
-      const int & iswap,
-      const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
-      const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
-      _x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),_nfirst(nfirst),_list(list.view<DeviceType>()),_iswap(iswap),
-      _xprd(xprd),_yprd(yprd),_zprd(zprd),
-      _xy(xy),_xz(xz),_yz(yz) {
-        _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
-        _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
-  };
-
-  KOKKOS_INLINE_FUNCTION
-  void operator() (const int& i) const {
-        const int j = _list(_iswap,i);
-      if (PBC_FLAG == 0) {
-          _xw(i+_nfirst,0) = _x(j,0);
-          _xw(i+_nfirst,1) = _x(j,1);
-          _xw(i+_nfirst,2) = _x(j,2);
-      } else {
-        if (TRICLINIC == 0) {
-          _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd;
-          _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd;
-          _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
-        } else {
-          _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
-          _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
-          _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
-        }
-      }
-
-  }
-};
-
-/* ---------------------------------------------------------------------- */
-
-int AtomVecBondKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap,
-										const int nfirst, const int &pbc_flag, const int* const pbc) {
-  if(commKK->forward_comm_on_host) {
-    sync(Host,X_MASK);
-    modified(Host,X_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
-      struct AtomVecBondKokkos_PackCommSelf<LMPHostType,1,1> f(atomKK->k_x,nfirst,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      } else {
-      struct AtomVecBondKokkos_PackCommSelf<LMPHostType,1,0> f(atomKK->k_x,nfirst,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      }
-    } else {
-      if(domain->triclinic) {
-      struct AtomVecBondKokkos_PackCommSelf<LMPHostType,0,1> f(atomKK->k_x,nfirst,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      } else {
-      struct AtomVecBondKokkos_PackCommSelf<LMPHostType,0,0> f(atomKK->k_x,nfirst,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      }
-    }
-  } else {
-    sync(Device,X_MASK);
-    modified(Device,X_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
-      struct AtomVecBondKokkos_PackCommSelf<LMPDeviceType,1,1> f(atomKK->k_x,nfirst,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      } else {
-      struct AtomVecBondKokkos_PackCommSelf<LMPDeviceType,1,0> f(atomKK->k_x,nfirst,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      }
-    } else {
-      if(domain->triclinic) {
-      struct AtomVecBondKokkos_PackCommSelf<LMPDeviceType,0,1> f(atomKK->k_x,nfirst,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      } else {
-      struct AtomVecBondKokkos_PackCommSelf<LMPDeviceType,0,0> f(atomKK->k_x,nfirst,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      }
-    }
-  }
-	return n*3;
-}
-
-/* ---------------------------------------------------------------------- */
-
-template<class DeviceType>
-struct AtomVecBondKokkos_UnpackComm {
-  typedef DeviceType device_type;
-
-  typename ArrayTypes<DeviceType>::t_x_array _x;
-  typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf;
-  int _first;
-
-  AtomVecBondKokkos_UnpackComm(
-      const typename DAT::tdual_x_array &x,
-      const typename DAT::tdual_xfloat_2d &buf,
-      const int& first):_x(x.view<DeviceType>()),_buf(buf.view<DeviceType>()),
-                        _first(first) {};
-
-  KOKKOS_INLINE_FUNCTION
-  void operator() (const int& i) const {
-      _x(i+_first,0) = _buf(i,0);
-      _x(i+_first,1) = _buf(i,1);
-      _x(i+_first,2) = _buf(i,2);
-  }
-};
-
-/* ---------------------------------------------------------------------- */
-
-void AtomVecBondKokkos::unpack_comm_kokkos(const int &n, const int &first,
-    const DAT::tdual_xfloat_2d &buf ) {
-  if(commKK->forward_comm_on_host) {
-    sync(Host,X_MASK);
-    modified(Host,X_MASK);
-    struct AtomVecBondKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
-    Kokkos::parallel_for(n,f);
-  } else {
-    sync(Device,X_MASK);
-    modified(Device,X_MASK);
-    struct AtomVecBondKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
-    Kokkos::parallel_for(n,f);
-  }
-}
-
-/* ---------------------------------------------------------------------- */
-
-int AtomVecBondKokkos::pack_comm(int n, int *list, double *buf,
-                                 int pbc_flag, int *pbc)
-{
-  int i,j,m;
-  double dx,dy,dz;
-
-  m = 0;
-  if (pbc_flag == 0) {
-    for (i = 0; i < n; i++) {
-      j = list[i];
-      buf[m++] = h_x(j,0);
-      buf[m++] = h_x(j,1);
-      buf[m++] = h_x(j,2);
-    }
-  } else {
-    if (domain->triclinic == 0) {
-      dx = pbc[0]*domain->xprd;
-      dy = pbc[1]*domain->yprd;
-      dz = pbc[2]*domain->zprd;
-    } else {
-      dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
-      dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
-      dz = pbc[2]*domain->zprd;
-    }
-    for (i = 0; i < n; i++) {
-      j = list[i];
-      buf[m++] = h_x(j,0) + dx;
-      buf[m++] = h_x(j,1) + dy;
-      buf[m++] = h_x(j,2) + dz;
-    }
-  }
-  return m;
-}
-
-/* ---------------------------------------------------------------------- */
-
-int AtomVecBondKokkos::pack_comm_vel(int n, int *list, double *buf,
-                                     int pbc_flag, int *pbc)
-{
-  int i,j,m;
-  double dx,dy,dz,dvx,dvy,dvz;
-
-  m = 0;
-  if (pbc_flag == 0) {
-    for (i = 0; i < n; i++) {
-      j = list[i];
-      buf[m++] = h_x(j,0);
-      buf[m++] = h_x(j,1);
-      buf[m++] = h_x(j,2);
-      buf[m++] = h_v(j,0);
-      buf[m++] = h_v(j,1);
-      buf[m++] = h_v(j,2);
-    }
-  } else {
-    if (domain->triclinic == 0) {
-      dx = pbc[0]*domain->xprd;
-      dy = pbc[1]*domain->yprd;
-      dz = pbc[2]*domain->zprd;
-    } else {
-      dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
-      dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
-      dz = pbc[2]*domain->zprd;
-    }
-    if (!deform_vremap) {
-      for (i = 0; i < n; i++) {
-        j = list[i];
-        buf[m++] = h_x(j,0) + dx;
-        buf[m++] = h_x(j,1) + dy;
-        buf[m++] = h_x(j,2) + dz;
-        buf[m++] = h_v(j,0);
-        buf[m++] = h_v(j,1);
-        buf[m++] = h_v(j,2);
-      }
-    } else {
-      dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
-      dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
-      dvz = pbc[2]*h_rate[2];
-      for (i = 0; i < n; i++) {
-        j = list[i];
-        buf[m++] = h_x(j,0) + dx;
-        buf[m++] = h_x(j,1) + dy;
-        buf[m++] = h_x(j,2) + dz;
-        if (mask[i] & deform_groupbit) {
-          buf[m++] = h_v(j,0) + dvx;
-          buf[m++] = h_v(j,1) + dvy;
-          buf[m++] = h_v(j,2) + dvz;
-        } else {
-          buf[m++] = h_v(j,0);
-          buf[m++] = h_v(j,1);
-          buf[m++] = h_v(j,2);
-        }
-      }
-    }
-  }
-  return m;
-}
-
-/* ---------------------------------------------------------------------- */
-
-void AtomVecBondKokkos::unpack_comm(int n, int first, double *buf)
-{
-  int i,m,last;
-
-  m = 0;
-  last = first + n;
-  for (i = first; i < last; i++) {
-    h_x(i,0) = buf[m++];
-    h_x(i,1) = buf[m++];
-    h_x(i,2) = buf[m++];
-  }
-}
-
-/* ---------------------------------------------------------------------- */
-
-void AtomVecBondKokkos::unpack_comm_vel(int n, int first, double *buf)
-{
-  int i,m,last;
-
-  m = 0;
-  last = first + n;
-  for (i = first; i < last; i++) {
-    h_x(i,0) = buf[m++];
-    h_x(i,1) = buf[m++];
-    h_x(i,2) = buf[m++];
-    h_v(i,0) = buf[m++];
-    h_v(i,1) = buf[m++];
-    h_v(i,2) = buf[m++];
-  }
-}
-
-/* ---------------------------------------------------------------------- */
-
-int AtomVecBondKokkos::pack_reverse(int n, int first, double *buf)
-{
-  if(n > 0)
-    sync(Host,F_MASK);
-
-  int m = 0;
-  const int last = first + n;
-  for (int i = first; i < last; i++) {
-    buf[m++] = h_f(i,0);
-    buf[m++] = h_f(i,1);
-    buf[m++] = h_f(i,2);
-  }
-  return m;
-}
-
-/* ---------------------------------------------------------------------- */
-
-void AtomVecBondKokkos::unpack_reverse(int n, int *list, double *buf)
-{
-  if(n > 0)
-    modified(Host,F_MASK);
-
-  int m = 0;
-  for (int i = 0; i < n; i++) {
-    const int j = list[i];
-    h_f(j,0) += buf[m++];
-    h_f(j,1) += buf[m++];
-    h_f(j,2) += buf[m++];
-  }
-}
-
-/* ---------------------------------------------------------------------- */
-
 template<class DeviceType,int PBC_FLAG>
 struct AtomVecBondKokkos_PackBorder {
   typedef DeviceType device_type;
diff --git a/src/KOKKOS/atom_vec_bond_kokkos.h b/src/KOKKOS/atom_vec_bond_kokkos.h
index 3dcc99fa784bb86a33ef11474e42865a04d2253f..7ec15450efc23c91d9ff897fe09a52e9046e1dd3 100644
--- a/src/KOKKOS/atom_vec_bond_kokkos.h
+++ b/src/KOKKOS/atom_vec_bond_kokkos.h
@@ -32,12 +32,6 @@ class AtomVecBondKokkos : public AtomVecKokkos {
   virtual ~AtomVecBondKokkos() {}
   void grow(int);
   void copy(int, int, int);
-  int pack_comm(int, int *, double *, int, int *);
-  int pack_comm_vel(int, int *, double *, int, int *);
-  void unpack_comm(int, int, double *);
-  void unpack_comm_vel(int, int, double *);
-  int pack_reverse(int, int, double *);
-  void unpack_reverse(int, int *, double *);
   int pack_border(int, int *, double *, int, int *);
   int pack_border_vel(int, int *, double *, int, int *);
   int pack_border_hybrid(int, int *, double *);
@@ -59,15 +53,6 @@ class AtomVecBondKokkos : public AtomVecKokkos {
   bigint memory_usage();
 
   void grow_reset();
-  int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
-                       const int & iswap,
-                       const DAT::tdual_xfloat_2d &buf,
-                       const int &pbc_flag, const int pbc[]);
-  void unpack_comm_kokkos(const int &n, const int &nfirst,
-                          const DAT::tdual_xfloat_2d &buf);
-  int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
-                     const int & iswap, const int nfirst,
-                     const int &pbc_flag, const int pbc[]);
   int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
                          DAT::tdual_xfloat_2d buf,int iswap,
                          int pbc_flag, int *pbc, ExecutionSpace space);
@@ -112,9 +97,6 @@ class AtomVecBondKokkos : public AtomVecKokkos {
   DAT::t_x_array d_x;
   DAT::t_v_array d_v;
   DAT::t_f_array d_f;
-  HAT::t_x_array h_x;
-  HAT::t_v_array h_v;
-  HAT::t_f_array h_f;
 
   DAT::t_tagint_1d d_molecule;
   DAT::t_int_2d d_nspecial;
diff --git a/src/KOKKOS/atom_vec_charge_kokkos.cpp b/src/KOKKOS/atom_vec_charge_kokkos.cpp
index 89f7e91c2b18feabc5c5427c5b92092afdfd0ce8..7b8b74b4051b63411f673b537ca4c1b9ce731198 100644
--- a/src/KOKKOS/atom_vec_charge_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_charge_kokkos.cpp
@@ -199,397 +199,6 @@ struct AtomVecChargeKokkos_PackComm {
 
 /* ---------------------------------------------------------------------- */
 
-int AtomVecChargeKokkos::pack_comm_kokkos(const int &n,
-                                          const DAT::tdual_int_2d &list,
-                                          const int & iswap,
-                                          const DAT::tdual_xfloat_2d &buf,
-                                          const int &pbc_flag,
-                                          const int* const pbc)
-{
-  // Check whether to always run forward communication on the host
-  // Choose correct forward PackComm kernel
-
-  if(commKK->forward_comm_on_host) {
-    sync(Host,X_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
-        struct AtomVecChargeKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,buf,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      } else {
-        struct AtomVecChargeKokkos_PackComm<LMPHostType,1,0> f(atomKK->k_x,buf,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      }
-    } else {
-      if(domain->triclinic) {
-        struct AtomVecChargeKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,buf,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      } else {
-        struct AtomVecChargeKokkos_PackComm<LMPHostType,0,0> f(atomKK->k_x,buf,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      }
-    }
-  } else {
-    sync(Device,X_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
-        struct AtomVecChargeKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,buf,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      } else {
-        struct AtomVecChargeKokkos_PackComm<LMPDeviceType,1,0> f(atomKK->k_x,buf,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      }
-    } else {
-      if(domain->triclinic) {
-        struct AtomVecChargeKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,buf,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      } else {
-        struct AtomVecChargeKokkos_PackComm<LMPDeviceType,0,0> f(atomKK->k_x,buf,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      }
-    }
-  }
-
-	return n*size_forward;
-}
-
-/* ---------------------------------------------------------------------- */
-
-template<class DeviceType,int PBC_FLAG,int TRICLINIC>
-struct AtomVecChargeKokkos_PackCommSelf {
-  typedef DeviceType device_type;
-
-  typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
-  typename ArrayTypes<DeviceType>::t_x_array _xw;
-  int _nfirst;
-  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
-  const int _iswap;
-  X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
-  X_FLOAT _pbc[6];
-
-  AtomVecChargeKokkos_PackCommSelf(
-      const typename DAT::tdual_x_array &x,
-      const int &nfirst,
-      const typename DAT::tdual_int_2d &list,
-      const int & iswap,
-      const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
-      const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
-      _x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),_nfirst(nfirst),_list(list.view<DeviceType>()),_iswap(iswap),
-      _xprd(xprd),_yprd(yprd),_zprd(zprd),
-      _xy(xy),_xz(xz),_yz(yz) {
-        _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
-        _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
-  };
-
-  KOKKOS_INLINE_FUNCTION
-  void operator() (const int& i) const {
-        const int j = _list(_iswap,i);
-      if (PBC_FLAG == 0) {
-          _xw(i+_nfirst,0) = _x(j,0);
-          _xw(i+_nfirst,1) = _x(j,1);
-          _xw(i+_nfirst,2) = _x(j,2);
-      } else {
-        if (TRICLINIC == 0) {
-          _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd;
-          _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd;
-          _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
-        } else {
-          _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
-          _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
-          _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
-        }
-      }
-
-  }
-};
-
-/* ---------------------------------------------------------------------- */
-
-int AtomVecChargeKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap,
-                                        const int nfirst, const int &pbc_flag, const int* const pbc) {
-  if(commKK->forward_comm_on_host) {
-    sync(Host,X_MASK);
-    modified(Host,X_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
-      struct AtomVecChargeKokkos_PackCommSelf<LMPHostType,1,1> f(atomKK->k_x,nfirst,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      } else {
-      struct AtomVecChargeKokkos_PackCommSelf<LMPHostType,1,0> f(atomKK->k_x,nfirst,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      }
-    } else {
-      if(domain->triclinic) {
-      struct AtomVecChargeKokkos_PackCommSelf<LMPHostType,0,1> f(atomKK->k_x,nfirst,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      } else {
-      struct AtomVecChargeKokkos_PackCommSelf<LMPHostType,0,0> f(atomKK->k_x,nfirst,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      }
-    }
-  } else {
-    sync(Device,X_MASK);
-    modified(Device,X_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
-      struct AtomVecChargeKokkos_PackCommSelf<LMPDeviceType,1,1> f(atomKK->k_x,nfirst,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      } else {
-      struct AtomVecChargeKokkos_PackCommSelf<LMPDeviceType,1,0> f(atomKK->k_x,nfirst,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      }
-    } else {
-      if(domain->triclinic) {
-      struct AtomVecChargeKokkos_PackCommSelf<LMPDeviceType,0,1> f(atomKK->k_x,nfirst,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      } else {
-      struct AtomVecChargeKokkos_PackCommSelf<LMPDeviceType,0,0> f(atomKK->k_x,nfirst,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      }
-    }
-  }
-	return n*3;
-}
-
-/* ---------------------------------------------------------------------- */
-
-template<class DeviceType>
-struct AtomVecChargeKokkos_UnpackComm {
-  typedef DeviceType device_type;
-
-  typename ArrayTypes<DeviceType>::t_x_array _x;
-  typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf;
-  int _first;
-
-  AtomVecChargeKokkos_UnpackComm(
-      const typename DAT::tdual_x_array &x,
-      const typename DAT::tdual_xfloat_2d &buf,
-      const int& first):_x(x.view<DeviceType>()),_buf(buf.view<DeviceType>()),
-                        _first(first) {};
-
-  KOKKOS_INLINE_FUNCTION
-  void operator() (const int& i) const {
-      _x(i+_first,0) = _buf(i,0);
-      _x(i+_first,1) = _buf(i,1);
-      _x(i+_first,2) = _buf(i,2);
-  }
-};
-
-/* ---------------------------------------------------------------------- */
-
-void AtomVecChargeKokkos::unpack_comm_kokkos(const int &n, const int &first,
-    const DAT::tdual_xfloat_2d &buf ) {
-  if(commKK->forward_comm_on_host) {
-    sync(Host,X_MASK);
-    modified(Host,X_MASK);
-    struct AtomVecChargeKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
-    Kokkos::parallel_for(n,f);
-  } else {
-    sync(Device,X_MASK);
-    modified(Device,X_MASK);
-    struct AtomVecChargeKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
-    Kokkos::parallel_for(n,f);
-  }
-}
-
-/* ---------------------------------------------------------------------- */
-
-int AtomVecChargeKokkos::pack_comm(int n, int *list, double *buf,
-                             int pbc_flag, int *pbc)
-{
-  int i,j,m;
-  double dx,dy,dz;
-
-  m = 0;
-  if (pbc_flag == 0) {
-    for (i = 0; i < n; i++) {
-      j = list[i];
-      buf[m++] = h_x(j,0);
-      buf[m++] = h_x(j,1);
-      buf[m++] = h_x(j,2);
-    }
-  } else {
-    if (domain->triclinic == 0) {
-      dx = pbc[0]*domain->xprd;
-      dy = pbc[1]*domain->yprd;
-      dz = pbc[2]*domain->zprd;
-    } else {
-      dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
-      dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
-      dz = pbc[2]*domain->zprd;
-    }
-    for (i = 0; i < n; i++) {
-      j = list[i];
-      buf[m++] = h_x(j,0) + dx;
-      buf[m++] = h_x(j,1) + dy;
-      buf[m++] = h_x(j,2) + dz;
-    }
-  }
-  return m;
-}
-
-/* ---------------------------------------------------------------------- */
-
-int AtomVecChargeKokkos::pack_comm_vel(int n, int *list, double *buf,
-                                 int pbc_flag, int *pbc)
-{
-  int i,j,m;
-  double dx,dy,dz,dvx,dvy,dvz;
-
-  m = 0;
-  if (pbc_flag == 0) {
-    for (i = 0; i < n; i++) {
-      j = list[i];
-      buf[m++] = h_x(j,0);
-      buf[m++] = h_x(j,1);
-      buf[m++] = h_x(j,2);
-      buf[m++] = h_v(j,0);
-      buf[m++] = h_v(j,1);
-      buf[m++] = h_v(j,2);
-    }
-  } else {
-    if (domain->triclinic == 0) {
-      dx = pbc[0]*domain->xprd;
-      dy = pbc[1]*domain->yprd;
-      dz = pbc[2]*domain->zprd;
-    } else {
-      dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
-      dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
-      dz = pbc[2]*domain->zprd;
-    }
-    if (!deform_vremap) {
-      for (i = 0; i < n; i++) {
-        j = list[i];
-        buf[m++] = h_x(j,0) + dx;
-        buf[m++] = h_x(j,1) + dy;
-        buf[m++] = h_x(j,2) + dz;
-        buf[m++] = h_v(j,0);
-        buf[m++] = h_v(j,1);
-        buf[m++] = h_v(j,2);
-      }
-    } else {
-      dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
-      dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
-      dvz = pbc[2]*h_rate[2];
-      for (i = 0; i < n; i++) {
-        j = list[i];
-        buf[m++] = h_x(j,0) + dx;
-        buf[m++] = h_x(j,1) + dy;
-        buf[m++] = h_x(j,2) + dz;
-        if (mask[i] & deform_groupbit) {
-          buf[m++] = h_v(j,0) + dvx;
-          buf[m++] = h_v(j,1) + dvy;
-          buf[m++] = h_v(j,2) + dvz;
-        } else {
-          buf[m++] = h_v(j,0);
-          buf[m++] = h_v(j,1);
-          buf[m++] = h_v(j,2);
-        }
-      }
-    }
-  }
-  return m;
-}
-
-/* ---------------------------------------------------------------------- */
-
-void AtomVecChargeKokkos::unpack_comm(int n, int first, double *buf)
-{
-  int i,m,last;
-
-  m = 0;
-  last = first + n;
-  for (i = first; i < last; i++) {
-    h_x(i,0) = buf[m++];
-    h_x(i,1) = buf[m++];
-    h_x(i,2) = buf[m++];
-  }
-}
-
-/* ---------------------------------------------------------------------- */
-
-void AtomVecChargeKokkos::unpack_comm_vel(int n, int first, double *buf)
-{
-  int i,m,last;
-
-  m = 0;
-  last = first + n;
-  for (i = first; i < last; i++) {
-    h_x(i,0) = buf[m++];
-    h_x(i,1) = buf[m++];
-    h_x(i,2) = buf[m++];
-    h_v(i,0) = buf[m++];
-    h_v(i,1) = buf[m++];
-    h_v(i,2) = buf[m++];
-  }
-}
-
-/* ---------------------------------------------------------------------- */
-
-int AtomVecChargeKokkos::pack_reverse(int n, int first, double *buf)
-{
-  if(n > 0)
-    sync(Host,F_MASK);
-
-  int m = 0;
-  const int last = first + n;
-  for (int i = first; i < last; i++) {
-    buf[m++] = h_f(i,0);
-    buf[m++] = h_f(i,1);
-    buf[m++] = h_f(i,2);
-  }
-  return m;
-}
-
-/* ---------------------------------------------------------------------- */
-
-void AtomVecChargeKokkos::unpack_reverse(int n, int *list, double *buf)
-{
-  if(n > 0)
-    modified(Host,F_MASK);
-
-  int m = 0;
-  for (int i = 0; i < n; i++) {
-    const int j = list[i];
-    h_f(j,0) += buf[m++];
-    h_f(j,1) += buf[m++];
-    h_f(j,2) += buf[m++];
-  }
-}
-
-/* ---------------------------------------------------------------------- */
-
 template<class DeviceType,int PBC_FLAG>
 struct AtomVecChargeKokkos_PackBorder {
   typedef DeviceType device_type;
diff --git a/src/KOKKOS/atom_vec_charge_kokkos.h b/src/KOKKOS/atom_vec_charge_kokkos.h
index f9b385e7ed87aaf7557dfb0d0c7bdbbc2febafb2..e9ff70bbe103c7c1df5d99387ee92a8530bcd79b 100644
--- a/src/KOKKOS/atom_vec_charge_kokkos.h
+++ b/src/KOKKOS/atom_vec_charge_kokkos.h
@@ -33,12 +33,6 @@ class AtomVecChargeKokkos : public AtomVecKokkos {
   virtual ~AtomVecChargeKokkos() {}
   void grow(int);
   void copy(int, int, int);
-  int pack_comm(int, int *, double *, int, int *);
-  int pack_comm_vel(int, int *, double *, int, int *);
-  void unpack_comm(int, int, double *);
-  void unpack_comm_vel(int, int, double *);
-  int pack_reverse(int, int, double *);
-  void unpack_reverse(int, int *, double *);
   int pack_border(int, int *, double *, int, int *);
   int pack_border_vel(int, int *, double *, int, int *);
   int pack_border_hybrid(int, int *, double *);
@@ -60,15 +54,6 @@ class AtomVecChargeKokkos : public AtomVecKokkos {
   bigint memory_usage();
 
   void grow_reset();
-  int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
-                       const int & iswap,
-                       const DAT::tdual_xfloat_2d &buf,
-                       const int &pbc_flag, const int pbc[]);
-  void unpack_comm_kokkos(const int &n, const int &nfirst,
-                          const DAT::tdual_xfloat_2d &buf);
-  int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
-                     const int & iswap, const int nfirst,
-                     const int &pbc_flag, const int pbc[]);
   int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
                          DAT::tdual_xfloat_2d buf,int iswap,
                          int pbc_flag, int *pbc, ExecutionSpace space);
@@ -108,9 +93,6 @@ class AtomVecChargeKokkos : public AtomVecKokkos {
   DAT::t_x_array d_x;
   DAT::t_v_array d_v;
   DAT::t_f_array d_f;
-  HAT::t_x_array h_x;
-  HAT::t_v_array h_v;
-  HAT::t_f_array h_f;
 
   DAT::t_float_1d d_q;
 
diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.h b/src/KOKKOS/atom_vec_dpd_kokkos.h
index 372404cc7d9f239e8b23516f091a67011e7e59bd..cec1b82357eab95cf9a34595b5f398793cb69f4d 100644
--- a/src/KOKKOS/atom_vec_dpd_kokkos.h
+++ b/src/KOKKOS/atom_vec_dpd_kokkos.h
@@ -111,9 +111,6 @@ class AtomVecDPDKokkos : public AtomVecKokkos {
   DAT::t_x_array d_x;
   DAT::t_v_array d_v;
   DAT::t_f_array d_f;
-  HAT::t_x_array h_x;
-  HAT::t_v_array h_v;
-  HAT::t_f_array h_f;
 
   DAT::tdual_int_1d k_count;
 };
diff --git a/src/KOKKOS/atom_vec_full_kokkos.cpp b/src/KOKKOS/atom_vec_full_kokkos.cpp
index fd7eaf7c81c9c6814b606976fc4f06def58f9339..8e9abe40675f7816ca969a23c0ab55855f71c5d1 100644
--- a/src/KOKKOS/atom_vec_full_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_full_kokkos.cpp
@@ -307,452 +307,6 @@ void AtomVecFullKokkos::copy(int i, int j, int delflag)
 
 /* ---------------------------------------------------------------------- */
 
-template<class DeviceType,int PBC_FLAG,int TRICLINIC>
-struct AtomVecFullKokkos_PackComm {
-  typedef DeviceType device_type;
-
-  typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
-  typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
-  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
-  const int _iswap;
-  X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
-  X_FLOAT _pbc[6];
-
-  AtomVecFullKokkos_PackComm(
-      const typename DAT::tdual_x_array &x,
-      const typename DAT::tdual_xfloat_2d &buf,
-      const typename DAT::tdual_int_2d &list,
-      const int & iswap,
-      const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
-      const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
-      _x(x.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap),
-      _xprd(xprd),_yprd(yprd),_zprd(zprd),
-      _xy(xy),_xz(xz),_yz(yz) {
-        const size_t maxsend = (buf.view<DeviceType>().dimension_0()
-				*buf.view<DeviceType>().dimension_1())/3;
-        const size_t elements = 3;
-        buffer_view<DeviceType>(_buf,buf,maxsend,elements);
-        _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
-        _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
-  };
-
-  KOKKOS_INLINE_FUNCTION
-  void operator() (const int& i) const {
-        const int j = _list(_iswap,i);
-      if (PBC_FLAG == 0) {
-          _buf(i,0) = _x(j,0);
-          _buf(i,1) = _x(j,1);
-          _buf(i,2) = _x(j,2);
-      } else {
-        if (TRICLINIC == 0) {
-          _buf(i,0) = _x(j,0) + _pbc[0]*_xprd;
-          _buf(i,1) = _x(j,1) + _pbc[1]*_yprd;
-          _buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
-        } else {
-          _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
-          _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
-          _buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
-        }
-      }
-  }
-};
-
-/* ---------------------------------------------------------------------- */
-
-int AtomVecFullKokkos::pack_comm_kokkos(const int &n,
-                                             const DAT::tdual_int_2d &list,
-                                             const int & iswap,
-                                             const DAT::tdual_xfloat_2d &buf,
-                                             const int &pbc_flag,
-                                             const int* const pbc)
-{
-  // Check whether to always run forward communication on the host
-  // Choose correct forward PackComm kernel
-
-  if(commKK->forward_comm_on_host) {
-    sync(Host,X_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
-        struct AtomVecFullKokkos_PackComm<LMPHostType,1,1>
-          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
-            domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      } else {
-        struct AtomVecFullKokkos_PackComm<LMPHostType,1,0>
-          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
-            domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      }
-    } else {
-      if(domain->triclinic) {
-        struct AtomVecFullKokkos_PackComm<LMPHostType,0,1>
-          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
-            domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      } else {
-        struct AtomVecFullKokkos_PackComm<LMPHostType,0,0>
-          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
-            domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      }
-    }
-  } else {
-    sync(Device,X_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
-        struct AtomVecFullKokkos_PackComm<LMPDeviceType,1,1>
-          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
-            domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      } else {
-        struct AtomVecFullKokkos_PackComm<LMPDeviceType,1,0>
-          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
-            domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      }
-    } else {
-      if(domain->triclinic) {
-        struct AtomVecFullKokkos_PackComm<LMPDeviceType,0,1>
-          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
-            domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      } else {
-        struct AtomVecFullKokkos_PackComm<LMPDeviceType,0,0>
-          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
-            domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      }
-    }
-  }
-
-	return n*size_forward;
-}
-
-/* ---------------------------------------------------------------------- */
-
-template<class DeviceType,int PBC_FLAG,int TRICLINIC>
-struct AtomVecFullKokkos_PackCommSelf {
-  typedef DeviceType device_type;
-
-  typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
-  typename ArrayTypes<DeviceType>::t_x_array _xw;
-  int _nfirst;
-  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
-  const int _iswap;
-  X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
-  X_FLOAT _pbc[6];
-
-  AtomVecFullKokkos_PackCommSelf(
-      const typename DAT::tdual_x_array &x,
-      const int &nfirst,
-      const typename DAT::tdual_int_2d &list,
-      const int & iswap,
-      const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
-      const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
-    _x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),_nfirst(nfirst),
-    _list(list.view<DeviceType>()),_iswap(iswap),
-    _xprd(xprd),_yprd(yprd),_zprd(zprd),
-    _xy(xy),_xz(xz),_yz(yz) {
-    _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
-    _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
-  };
-
-  KOKKOS_INLINE_FUNCTION
-  void operator() (const int& i) const {
-        const int j = _list(_iswap,i);
-      if (PBC_FLAG == 0) {
-          _xw(i+_nfirst,0) = _x(j,0);
-          _xw(i+_nfirst,1) = _x(j,1);
-          _xw(i+_nfirst,2) = _x(j,2);
-      } else {
-        if (TRICLINIC == 0) {
-          _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd;
-          _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd;
-          _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
-        } else {
-          _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
-          _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
-          _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
-        }
-      }
-
-  }
-};
-
-/* ---------------------------------------------------------------------- */
-
-int AtomVecFullKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
-                                           const int & iswap,
-                                           const int nfirst, const int &pbc_flag,
-                                           const int* const pbc) {
-  if(commKK->forward_comm_on_host) {
-    sync(Host,X_MASK);
-    modified(Host,X_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
-      struct AtomVecFullKokkos_PackCommSelf<LMPHostType,1,1>
-        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      } else {
-      struct AtomVecFullKokkos_PackCommSelf<LMPHostType,1,0>
-        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      }
-    } else {
-      if(domain->triclinic) {
-      struct AtomVecFullKokkos_PackCommSelf<LMPHostType,0,1>
-        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      } else {
-      struct AtomVecFullKokkos_PackCommSelf<LMPHostType,0,0>
-        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      }
-    }
-  } else {
-    sync(Device,X_MASK);
-    modified(Device,X_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
-      struct AtomVecFullKokkos_PackCommSelf<LMPDeviceType,1,1>
-        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      } else {
-      struct AtomVecFullKokkos_PackCommSelf<LMPDeviceType,1,0>
-        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      }
-    } else {
-      if(domain->triclinic) {
-      struct AtomVecFullKokkos_PackCommSelf<LMPDeviceType,0,1>
-        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      } else {
-      struct AtomVecFullKokkos_PackCommSelf<LMPDeviceType,0,0>
-        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      }
-    }
-  }
-	return n*3;
-}
-
-/* ---------------------------------------------------------------------- */
-
-template<class DeviceType>
-struct AtomVecFullKokkos_UnpackComm {
-  typedef DeviceType device_type;
-
-  typename ArrayTypes<DeviceType>::t_x_array _x;
-  typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf;
-  int _first;
-
-  AtomVecFullKokkos_UnpackComm(
-      const typename DAT::tdual_x_array &x,
-      const typename DAT::tdual_xfloat_2d &buf,
-      const int& first):_x(x.view<DeviceType>()),_buf(buf.view<DeviceType>()),
-                        _first(first) {};
-
-  KOKKOS_INLINE_FUNCTION
-  void operator() (const int& i) const {
-      _x(i+_first,0) = _buf(i,0);
-      _x(i+_first,1) = _buf(i,1);
-      _x(i+_first,2) = _buf(i,2);
-  }
-};
-
-/* ---------------------------------------------------------------------- */
-
-void AtomVecFullKokkos::unpack_comm_kokkos(const int &n, const int &first,
-    const DAT::tdual_xfloat_2d &buf ) {
-  if(commKK->forward_comm_on_host) {
-    sync(Host,X_MASK);
-    modified(Host,X_MASK);
-    struct AtomVecFullKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
-    Kokkos::parallel_for(n,f);
-  } else {
-    sync(Device,X_MASK);
-    modified(Device,X_MASK);
-    struct AtomVecFullKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
-    Kokkos::parallel_for(n,f);
-  }
-}
-
-/* ---------------------------------------------------------------------- */
-
-int AtomVecFullKokkos::pack_comm(int n, int *list, double *buf,
-                                      int pbc_flag, int *pbc)
-{
-  int i,j,m;
-  double dx,dy,dz;
-
-  m = 0;
-  if (pbc_flag == 0) {
-    for (i = 0; i < n; i++) {
-      j = list[i];
-      buf[m++] = h_x(j,0);
-      buf[m++] = h_x(j,1);
-      buf[m++] = h_x(j,2);
-    }
-  } else {
-    if (domain->triclinic == 0) {
-      dx = pbc[0]*domain->xprd;
-      dy = pbc[1]*domain->yprd;
-      dz = pbc[2]*domain->zprd;
-    } else {
-      dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
-      dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
-      dz = pbc[2]*domain->zprd;
-    }
-    for (i = 0; i < n; i++) {
-      j = list[i];
-      buf[m++] = h_x(j,0) + dx;
-      buf[m++] = h_x(j,1) + dy;
-      buf[m++] = h_x(j,2) + dz;
-    }
-  }
-  return m;
-}
-
-/* ---------------------------------------------------------------------- */
-
-int AtomVecFullKokkos::pack_comm_vel(int n, int *list, double *buf,
-                                          int pbc_flag, int *pbc)
-{
-  int i,j,m;
-  double dx,dy,dz,dvx,dvy,dvz;
-
-  m = 0;
-  if (pbc_flag == 0) {
-    for (i = 0; i < n; i++) {
-      j = list[i];
-      buf[m++] = h_x(j,0);
-      buf[m++] = h_x(j,1);
-      buf[m++] = h_x(j,2);
-      buf[m++] = h_v(j,0);
-      buf[m++] = h_v(j,1);
-      buf[m++] = h_v(j,2);
-    }
-  } else {
-    if (domain->triclinic == 0) {
-      dx = pbc[0]*domain->xprd;
-      dy = pbc[1]*domain->yprd;
-      dz = pbc[2]*domain->zprd;
-    } else {
-      dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
-      dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
-      dz = pbc[2]*domain->zprd;
-    }
-    if (!deform_vremap) {
-      for (i = 0; i < n; i++) {
-        j = list[i];
-        buf[m++] = h_x(j,0) + dx;
-        buf[m++] = h_x(j,1) + dy;
-        buf[m++] = h_x(j,2) + dz;
-        buf[m++] = h_v(j,0);
-        buf[m++] = h_v(j,1);
-        buf[m++] = h_v(j,2);
-      }
-    } else {
-      dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
-      dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
-      dvz = pbc[2]*h_rate[2];
-      for (i = 0; i < n; i++) {
-        j = list[i];
-        buf[m++] = h_x(j,0) + dx;
-        buf[m++] = h_x(j,1) + dy;
-        buf[m++] = h_x(j,2) + dz;
-        if (mask[i] & deform_groupbit) {
-          buf[m++] = h_v(j,0) + dvx;
-          buf[m++] = h_v(j,1) + dvy;
-          buf[m++] = h_v(j,2) + dvz;
-        } else {
-          buf[m++] = h_v(j,0);
-          buf[m++] = h_v(j,1);
-          buf[m++] = h_v(j,2);
-        }
-      }
-    }
-  }
-  return m;
-}
-
-/* ---------------------------------------------------------------------- */
-
-void AtomVecFullKokkos::unpack_comm(int n, int first, double *buf)
-{
-  int i,m,last;
-
-  m = 0;
-  last = first + n;
-  for (i = first; i < last; i++) {
-    h_x(i,0) = buf[m++];
-    h_x(i,1) = buf[m++];
-    h_x(i,2) = buf[m++];
-  }
-}
-
-/* ---------------------------------------------------------------------- */
-
-void AtomVecFullKokkos::unpack_comm_vel(int n, int first, double *buf)
-{
-  int i,m,last;
-
-  m = 0;
-  last = first + n;
-  for (i = first; i < last; i++) {
-    h_x(i,0) = buf[m++];
-    h_x(i,1) = buf[m++];
-    h_x(i,2) = buf[m++];
-    h_v(i,0) = buf[m++];
-    h_v(i,1) = buf[m++];
-    h_v(i,2) = buf[m++];
-  }
-}
-
-/* ---------------------------------------------------------------------- */
-
-int AtomVecFullKokkos::pack_reverse(int n, int first, double *buf)
-{
-  if(n > 0)
-    sync(Host,F_MASK);
-
-  int m = 0;
-  const int last = first + n;
-  for (int i = first; i < last; i++) {
-    buf[m++] = h_f(i,0);
-    buf[m++] = h_f(i,1);
-    buf[m++] = h_f(i,2);
-  }
-  return m;
-}
-
-/* ---------------------------------------------------------------------- */
-
-void AtomVecFullKokkos::unpack_reverse(int n, int *list, double *buf)
-{
-  if(n > 0)
-    modified(Host,F_MASK);
-
-  int m = 0;
-  for (int i = 0; i < n; i++) {
-    const int j = list[i];
-    h_f(j,0) += buf[m++];
-    h_f(j,1) += buf[m++];
-    h_f(j,2) += buf[m++];
-  }
-}
-
-/* ---------------------------------------------------------------------- */
-
 template<class DeviceType,int PBC_FLAG>
 struct AtomVecFullKokkos_PackBorder {
   typedef DeviceType device_type;
diff --git a/src/KOKKOS/atom_vec_full_kokkos.h b/src/KOKKOS/atom_vec_full_kokkos.h
index 760df087e14105a3330b4a5301a1ddc09f9900af..33760a8b5fc88fcfab56e892f9c2848cd228043b 100644
--- a/src/KOKKOS/atom_vec_full_kokkos.h
+++ b/src/KOKKOS/atom_vec_full_kokkos.h
@@ -32,12 +32,6 @@ class AtomVecFullKokkos : public AtomVecKokkos {
   virtual ~AtomVecFullKokkos() {}
   void grow(int);
   void copy(int, int, int);
-  int pack_comm(int, int *, double *, int, int *);
-  int pack_comm_vel(int, int *, double *, int, int *);
-  void unpack_comm(int, int, double *);
-  void unpack_comm_vel(int, int, double *);
-  int pack_reverse(int, int, double *);
-  void unpack_reverse(int, int *, double *);
   int pack_border(int, int *, double *, int, int *);
   int pack_border_vel(int, int *, double *, int, int *);
   int pack_border_hybrid(int, int *, double *);
@@ -59,15 +53,6 @@ class AtomVecFullKokkos : public AtomVecKokkos {
   bigint memory_usage();
 
   void grow_reset();
-  int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
-                       const int & iswap,
-                       const DAT::tdual_xfloat_2d &buf,
-                       const int &pbc_flag, const int pbc[]);
-  void unpack_comm_kokkos(const int &n, const int &nfirst,
-                          const DAT::tdual_xfloat_2d &buf);
-  int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
-                     const int & iswap, const int nfirst,
-                     const int &pbc_flag, const int pbc[]);
   int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
                          DAT::tdual_xfloat_2d buf,int iswap,
                          int pbc_flag, int *pbc, ExecutionSpace space);
@@ -125,9 +110,6 @@ class AtomVecFullKokkos : public AtomVecKokkos {
   DAT::t_x_array d_x;
   DAT::t_v_array d_v;
   DAT::t_f_array d_f;
-  HAT::t_x_array h_x;
-  HAT::t_v_array h_v;
-  HAT::t_f_array h_f;
 
   DAT::t_float_1d d_q;
   HAT::t_float_1d h_q;
diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp
index 5542991395157470204cf82b701c92c3908368d0..03fb2a4ead549b09096705cefb7f929b69bc2cce 100644
--- a/src/KOKKOS/atom_vec_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_kokkos.cpp
@@ -12,6 +12,10 @@
 ------------------------------------------------------------------------- */
 
 #include "atom_vec_kokkos.h"
+#include "atom_kokkos.h"
+#include "comm_kokkos.h"
+#include "domain.h"
+#include "atom_masks.h"
 
 using namespace LAMMPS_NS;
 
@@ -24,3 +28,585 @@ AtomVecKokkos::AtomVecKokkos(LAMMPS *lmp) : AtomVec(lmp)
   buffer_size = 0;
 }
 
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType,int PBC_FLAG,int TRICLINIC>
+struct AtomVecKokkos_PackComm {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
+  typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
+  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
+  const int _iswap;
+  X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
+  X_FLOAT _pbc[6];
+
+  AtomVecKokkos_PackComm(
+      const typename DAT::tdual_x_array &x,
+      const typename DAT::tdual_xfloat_2d &buf,
+      const typename DAT::tdual_int_2d &list,
+      const int & iswap,
+      const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
+      const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
+      _x(x.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap),
+      _xprd(xprd),_yprd(yprd),_zprd(zprd),
+      _xy(xy),_xz(xz),_yz(yz) {
+        const size_t maxsend = (buf.view<DeviceType>().dimension_0()*buf.view<DeviceType>().dimension_1())/3;
+        const size_t elements = 3;
+        buffer_view<DeviceType>(_buf,buf,maxsend,elements);
+        _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
+        _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
+  };
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+        const int j = _list(_iswap,i);
+      if (PBC_FLAG == 0) {
+          _buf(i,0) = _x(j,0);
+          _buf(i,1) = _x(j,1);
+          _buf(i,2) = _x(j,2);
+      } else {
+        if (TRICLINIC == 0) {
+          _buf(i,0) = _x(j,0) + _pbc[0]*_xprd;
+          _buf(i,1) = _x(j,1) + _pbc[1]*_yprd;
+          _buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
+        } else {
+          _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
+          _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
+          _buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
+        }
+      }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecKokkos::pack_comm_kokkos(const int &n,
+                                          const DAT::tdual_int_2d &list,
+                                          const int & iswap,
+                                          const DAT::tdual_xfloat_2d &buf,
+                                          const int &pbc_flag,
+                                          const int* const pbc)
+{
+  // Check whether to always run forward communication on the host
+  // Choose correct forward PackComm kernel
+
+  if(commKK->forward_comm_on_host) {
+    sync(Host,X_MASK);
+    if(pbc_flag) {
+      if(domain->triclinic) {
+        struct AtomVecKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      } else {
+        struct AtomVecKokkos_PackComm<LMPHostType,1,0> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      }
+    } else {
+      if(domain->triclinic) {
+        struct AtomVecKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      } else {
+        struct AtomVecKokkos_PackComm<LMPHostType,0,0> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      }
+    }
+  } else {
+    sync(Device,X_MASK);
+    if(pbc_flag) {
+      if(domain->triclinic) {
+        struct AtomVecKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      } else {
+        struct AtomVecKokkos_PackComm<LMPDeviceType,1,0> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      }
+    } else {
+      if(domain->triclinic) {
+        struct AtomVecKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      } else {
+        struct AtomVecKokkos_PackComm<LMPDeviceType,0,0> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      }
+    }
+  }
+
+	return n*size_forward;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType,int PBC_FLAG,int TRICLINIC>
+struct AtomVecKokkos_PackCommSelf {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
+  typename ArrayTypes<DeviceType>::t_x_array _xw;
+  int _nfirst;
+  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
+  const int _iswap;
+  X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
+  X_FLOAT _pbc[6];
+
+  AtomVecKokkos_PackCommSelf(
+      const typename DAT::tdual_x_array &x,
+      const int &nfirst,
+      const typename DAT::tdual_int_2d &list,
+      const int & iswap,
+      const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
+      const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
+      _x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),_nfirst(nfirst),_list(list.view<DeviceType>()),_iswap(iswap),
+      _xprd(xprd),_yprd(yprd),_zprd(zprd),
+      _xy(xy),_xz(xz),_yz(yz) {
+        _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
+        _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
+  };
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+        const int j = _list(_iswap,i);
+      if (PBC_FLAG == 0) {
+          _xw(i+_nfirst,0) = _x(j,0);
+          _xw(i+_nfirst,1) = _x(j,1);
+          _xw(i+_nfirst,2) = _x(j,2);
+      } else {
+        if (TRICLINIC == 0) {
+          _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd;
+          _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd;
+          _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
+        } else {
+          _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
+          _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
+          _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
+        }
+      }
+
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap,
+                                        const int nfirst, const int &pbc_flag, const int* const pbc) {
+  if(commKK->forward_comm_on_host) {
+    sync(Host,X_MASK);
+    modified(Host,X_MASK);
+    if(pbc_flag) {
+      if(domain->triclinic) {
+      struct AtomVecKokkos_PackCommSelf<LMPHostType,1,1> f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      } else {
+      struct AtomVecKokkos_PackCommSelf<LMPHostType,1,0> f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      }
+    } else {
+      if(domain->triclinic) {
+      struct AtomVecKokkos_PackCommSelf<LMPHostType,0,1> f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      } else {
+      struct AtomVecKokkos_PackCommSelf<LMPHostType,0,0> f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      }
+    }
+  } else {
+    sync(Device,X_MASK);
+    modified(Device,X_MASK);
+    if(pbc_flag) {
+      if(domain->triclinic) {
+      struct AtomVecKokkos_PackCommSelf<LMPDeviceType,1,1> f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      } else {
+      struct AtomVecKokkos_PackCommSelf<LMPDeviceType,1,0> f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      }
+    } else {
+      if(domain->triclinic) {
+      struct AtomVecKokkos_PackCommSelf<LMPDeviceType,0,1> f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      } else {
+      struct AtomVecKokkos_PackCommSelf<LMPDeviceType,0,0> f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      }
+    }
+  }
+	return n*3;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecKokkos_UnpackComm {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_x_array _x;
+  typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf;
+  int _first;
+
+  AtomVecKokkos_UnpackComm(
+      const typename DAT::tdual_x_array &x,
+      const typename DAT::tdual_xfloat_2d &buf,
+      const int& first):_x(x.view<DeviceType>()),_buf(buf.view<DeviceType>()),
+                        _first(first) {};
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+      _x(i+_first,0) = _buf(i,0);
+      _x(i+_first,1) = _buf(i,1);
+      _x(i+_first,2) = _buf(i,2);
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecKokkos::unpack_comm_kokkos(const int &n, const int &first,
+    const DAT::tdual_xfloat_2d &buf ) {
+  if(commKK->forward_comm_on_host) {
+    sync(Host,X_MASK);
+    modified(Host,X_MASK);
+    struct AtomVecKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
+    Kokkos::parallel_for(n,f);
+  } else {
+    sync(Device,X_MASK);
+    modified(Device,X_MASK);
+    struct AtomVecKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
+    Kokkos::parallel_for(n,f);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecKokkos::pack_comm(int n, int *list, double *buf,
+                             int pbc_flag, int *pbc)
+{
+  int i,j,m;
+  double dx,dy,dz;
+
+  m = 0;
+  if (pbc_flag == 0) {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0);
+      buf[m++] = h_x(j,1);
+      buf[m++] = h_x(j,2);
+    }
+  } else {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
+      dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
+      dz = pbc[2]*domain->zprd;
+    }
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0) + dx;
+      buf[m++] = h_x(j,1) + dy;
+      buf[m++] = h_x(j,2) + dz;
+    }
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecKokkos::pack_comm_vel(int n, int *list, double *buf,
+                                 int pbc_flag, int *pbc)
+{
+  int i,j,m;
+  double dx,dy,dz,dvx,dvy,dvz;
+
+  m = 0;
+  if (pbc_flag == 0) {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0);
+      buf[m++] = h_x(j,1);
+      buf[m++] = h_x(j,2);
+      buf[m++] = h_v(j,0);
+      buf[m++] = h_v(j,1);
+      buf[m++] = h_v(j,2);
+    }
+  } else {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
+      dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
+      dz = pbc[2]*domain->zprd;
+    }
+    if (!deform_vremap) {
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0) + dx;
+        buf[m++] = h_x(j,1) + dy;
+        buf[m++] = h_x(j,2) + dz;
+        buf[m++] = h_v(j,0);
+        buf[m++] = h_v(j,1);
+        buf[m++] = h_v(j,2);
+      }
+    } else {
+      dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
+      dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
+      dvz = pbc[2]*h_rate[2];
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0) + dx;
+        buf[m++] = h_x(j,1) + dy;
+        buf[m++] = h_x(j,2) + dz;
+        if (atom->mask[i] & deform_groupbit) {
+          buf[m++] = h_v(j,0) + dvx;
+          buf[m++] = h_v(j,1) + dvy;
+          buf[m++] = h_v(j,2) + dvz;
+        } else {
+          buf[m++] = h_v(j,0);
+          buf[m++] = h_v(j,1);
+          buf[m++] = h_v(j,2);
+        }
+      }
+    }
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecKokkos::unpack_comm(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    h_x(i,0) = buf[m++];
+    h_x(i,1) = buf[m++];
+    h_x(i,2) = buf[m++];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecKokkos::unpack_comm_vel(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    h_x(i,0) = buf[m++];
+    h_x(i,1) = buf[m++];
+    h_x(i,2) = buf[m++];
+    h_v(i,0) = buf[m++];
+    h_v(i,1) = buf[m++];
+    h_v(i,2) = buf[m++];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecKokkos_PackReverse {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_f_array_randomread _f;
+  typename ArrayTypes<DeviceType>::t_ffloat_2d _buf;
+  int _first;
+
+  AtomVecKokkos_PackReverse(
+      const typename DAT::tdual_f_array &f,
+      const typename DAT::tdual_ffloat_2d &buf,
+      const int& first):_f(f.view<DeviceType>()),_buf(buf.view<DeviceType>()),
+                        _first(first) {};
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+    _buf(i,0) = _f(i+_first,0);
+    _buf(i,1) = _f(i+_first,1);
+    _buf(i,2) = _f(i+_first,2);
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecKokkos::pack_reverse_kokkos(const int &n, const int &first,
+    const DAT::tdual_ffloat_2d &buf ) {
+  if(commKK->reverse_comm_on_host) {
+    sync(Host,F_MASK);
+    struct AtomVecKokkos_PackReverse<LMPHostType> f(atomKK->k_f,buf,first);
+    Kokkos::parallel_for(n,f);
+  } else {
+    sync(Device,F_MASK);
+    struct AtomVecKokkos_PackReverse<LMPDeviceType> f(atomKK->k_f,buf,first);
+    Kokkos::parallel_for(n,f);
+  }
+
+  return n*size_reverse;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecKokkos_UnPackReverseSelf {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_f_array_randomread _f;
+  typename ArrayTypes<DeviceType>::t_f_array _fw;
+  int _nfirst;
+  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
+  const int _iswap;
+
+  AtomVecKokkos_UnPackReverseSelf(
+      const typename DAT::tdual_f_array &f,
+      const int &nfirst,
+      const typename DAT::tdual_int_2d &list,
+      const int & iswap):
+      _f(f.view<DeviceType>()),_fw(f.view<DeviceType>()),_nfirst(nfirst),_list(list.view<DeviceType>()),_iswap(iswap) {
+  };
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+    const int j = _list(_iswap,i);
+    _fw(j,0) += _f(i+_nfirst,0);
+    _fw(j,1) += _f(i+_nfirst,1);
+    _fw(j,2) += _f(i+_nfirst,2);
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecKokkos::unpack_reverse_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap,
+                                        const int nfirst) {
+  if(commKK->reverse_comm_on_host) {
+    sync(Host,F_MASK);
+    struct AtomVecKokkos_UnPackReverseSelf<LMPHostType> f(atomKK->k_f,nfirst,list,iswap);
+    Kokkos::parallel_for(n,f);
+    modified(Host,F_MASK);
+  } else {
+    sync(Device,F_MASK);
+    struct AtomVecKokkos_UnPackReverseSelf<LMPDeviceType> f(atomKK->k_f,nfirst,list,iswap);
+    Kokkos::parallel_for(n,f);
+    modified(Device,F_MASK);
+  }
+  return n*3;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecKokkos_UnPackReverse {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_f_array _f;
+  typename ArrayTypes<DeviceType>::t_ffloat_2d_const _buf;
+  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
+  const int _iswap;
+
+  AtomVecKokkos_UnPackReverse(
+      const typename DAT::tdual_f_array &f,
+      const typename DAT::tdual_ffloat_2d &buf,
+      const typename DAT::tdual_int_2d &list,
+      const int & iswap):
+      _f(f.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap) {
+        const size_t maxsend = (buf.view<DeviceType>().dimension_0()*buf.view<DeviceType>().dimension_1())/3;
+        const size_t elements = 3;
+        buffer_view<DeviceType>(_buf,buf,maxsend,elements);
+  };
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+    const int j = _list(_iswap,i);
+    _f(j,0) += _buf(i,0);
+    _f(j,1) += _buf(i,1);
+    _f(j,2) += _buf(i,2);
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecKokkos::unpack_reverse_kokkos(const int &n,
+                                          const DAT::tdual_int_2d &list,
+                                          const int & iswap,
+                                          const DAT::tdual_ffloat_2d &buf)
+{
+  // Check whether to always run reverse communication on the host
+  // Choose correct reverse UnPackReverse kernel
+
+  if(commKK->reverse_comm_on_host) {
+    struct AtomVecKokkos_UnPackReverse<LMPHostType> f(atomKK->k_f,buf,list,iswap);
+    Kokkos::parallel_for(n,f);
+    modified(Host,F_MASK);
+  } else {
+    struct AtomVecKokkos_UnPackReverse<LMPDeviceType> f(atomKK->k_f,buf,list,iswap);
+    Kokkos::parallel_for(n,f);
+    modified(Device,F_MASK);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecKokkos::pack_reverse(int n, int first, double *buf)
+{
+  if(n > 0)
+    sync(Host,F_MASK);
+
+  int m = 0;
+  const int last = first + n;
+  for (int i = first; i < last; i++) {
+    buf[m++] = h_f(i,0);
+    buf[m++] = h_f(i,1);
+    buf[m++] = h_f(i,2);
+  }
+
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecKokkos::unpack_reverse(int n, int *list, double *buf)
+{
+  int m = 0;
+  for (int i = 0; i < n; i++) {
+    const int j = list[i];
+    h_f(j,0) += buf[m++];
+    h_f(j,1) += buf[m++];
+    h_f(j,2) += buf[m++];
+  }
+
+  if(n > 0)
+    modified(Host,F_MASK);
+}
diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h
index 7f593f235f6736d0dd966c6989feb40632705b8a..20a07ec443690451a963ce14c51ed0f71873ae93 100644
--- a/src/KOKKOS/atom_vec_kokkos.h
+++ b/src/KOKKOS/atom_vec_kokkos.h
@@ -35,29 +35,48 @@ class AtomVecKokkos : public AtomVec {
  public:
   AtomVecKokkos(class LAMMPS *);
   virtual ~AtomVecKokkos() {}
+  virtual int pack_comm(int, int *, double *, int, int *);
+  virtual int pack_comm_vel(int, int *, double *, int, int *);
+  virtual void unpack_comm(int, int, double *);
+  virtual void unpack_comm_vel(int, int, double *);
+  virtual int pack_reverse(int, int, double *);
+  virtual void unpack_reverse(int, int *, double *);
 
   virtual void sync(ExecutionSpace space, unsigned int mask) = 0;
   virtual void modified(ExecutionSpace space, unsigned int mask) = 0;
-  virtual void sync_overlapping_device(ExecutionSpace space, unsigned int mask) {};
+  virtual void sync_overlapping_device(ExecutionSpace space, unsigned int mask) = 0;
 
   virtual int
     pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
                    const int & iswap, const int nfirst,
-                   const int &pbc_flag, const int pbc[]) = 0;
-  //{return 0;}
+                   const int &pbc_flag, const int pbc[]);
+
   virtual int
     pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &list,
                      const int & iswap, const DAT::tdual_xfloat_2d &buf,
-                     const int &pbc_flag, const int pbc[]) = 0;
-  //{return 0;}
+                     const int &pbc_flag, const int pbc[]);
+
   virtual void
     unpack_comm_kokkos(const int &n, const int &nfirst,
-                       const DAT::tdual_xfloat_2d &buf) = 0;
+                       const DAT::tdual_xfloat_2d &buf);
+
+  virtual int
+    unpack_reverse_self(const int &n, const DAT::tdual_int_2d &list,
+                      const int & iswap, const int nfirst);
+
+  virtual int
+    pack_reverse_kokkos(const int &n, const int &nfirst,
+                        const DAT::tdual_ffloat_2d &buf);
+
+  virtual void
+    unpack_reverse_kokkos(const int &n, const DAT::tdual_int_2d &list,
+                          const int & iswap, const DAT::tdual_ffloat_2d &buf);
+
   virtual int
     pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
                        DAT::tdual_xfloat_2d buf,int iswap,
                        int pbc_flag, int *pbc, ExecutionSpace space) = 0;
-  //{return 0;};
+
   virtual void
     unpack_border_kokkos(const int &n, const int &nfirst,
                          const DAT::tdual_xfloat_2d &buf,
@@ -68,15 +87,19 @@ class AtomVecKokkos : public AtomVec {
                          DAT::tdual_int_1d k_sendlist,
                          DAT::tdual_int_1d k_copylist,
                          ExecutionSpace space, int dim, X_FLOAT lo, X_FLOAT hi) = 0;
-  //{return 0;};
+
   virtual int
     unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
                            int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
                            ExecutionSpace space) = 0;
-  //{return 0;};
+
 
  protected:
 
+  HAT::t_x_array h_x;
+  HAT::t_v_array h_v;
+  HAT::t_f_array h_f;
+
   class CommKokkos *commKK;
   size_t buffer_size;
   void* buffer;
diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp
index f5ed0f525fee6aeba2642e93c32b5fb5917cbc80..5534341342d5017378169609dce1a0c5964d8338 100644
--- a/src/KOKKOS/comm_kokkos.cpp
+++ b/src/KOKKOS/comm_kokkos.cpp
@@ -46,7 +46,8 @@ CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp)
   if (sendlist) for (int i = 0; i < maxswap; i++) memory->destroy(sendlist[i]);
   memory->sfree(sendlist);
   sendlist = NULL;
-  k_sendlist = ArrayTypes<LMPDeviceType>::tdual_int_2d();
+  k_sendlist = DAT::tdual_int_2d();
+  k_total_send = DAT::tdual_int_scalar("comm::k_total_send");
 
   // error check for disallow of OpenMP threads?
 
@@ -57,12 +58,12 @@ CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp)
   memory->destroy(buf_recv);
   buf_recv = NULL;
 
-  k_exchange_sendlist = ArrayTypes<LMPDeviceType>::
+  k_exchange_sendlist = DAT::
     tdual_int_1d("comm:k_exchange_sendlist",100);
-  k_exchange_copylist = ArrayTypes<LMPDeviceType>::
+  k_exchange_copylist = DAT::
     tdual_int_1d("comm:k_exchange_copylist",100);
-  k_count = ArrayTypes<LMPDeviceType>::tdual_int_1d("comm:k_count",1);
-  k_sendflag = ArrayTypes<LMPDeviceType>::tdual_int_1d("comm:k_sendflag",100);
+  k_count = DAT::tdual_int_scalar("comm:k_count");
+  k_sendflag = DAT::tdual_int_1d("comm:k_sendflag",100);
 
   memory->destroy(maxsendlist);
   maxsendlist = NULL;
@@ -102,8 +103,10 @@ void CommKokkos::init()
   atomKK = (AtomKokkos *) atom;
   exchange_comm_classic = lmp->kokkos->exchange_comm_classic;
   forward_comm_classic = lmp->kokkos->forward_comm_classic;
+  reverse_comm_classic = lmp->kokkos->reverse_comm_classic;
   exchange_comm_on_host = lmp->kokkos->exchange_comm_on_host;
   forward_comm_on_host = lmp->kokkos->forward_comm_on_host;
+  reverse_comm_on_host = lmp->kokkos->reverse_comm_on_host;
 
   CommBrick::init();
 
@@ -132,8 +135,11 @@ void CommKokkos::init()
   if (force->newton == 0) check_reverse = 0;
   if (force->pair) check_reverse += force->pair->comm_reverse_off;
 
-  if(check_reverse || check_forward)
+  if (ghost_velocity)
     forward_comm_classic = true;
+
+  if (!comm_f_only) // not all Kokkos atom_vec styles have reverse pack/unpack routines yet
+    reverse_comm_classic = true;
 }
 
 /* ----------------------------------------------------------------------
@@ -173,7 +179,6 @@ void CommKokkos::forward_comm_device(int dummy)
   int n;
   MPI_Request request;
   AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec;
-  double **x = atom->x;
   double *buf;
 
   // exchange data with another proc
@@ -181,32 +186,29 @@ void CommKokkos::forward_comm_device(int dummy)
   // if comm_x_only set, exchange or copy directly to x, don't unpack
 
   k_sendlist.sync<DeviceType>();
+  atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK);
 
   for (int iswap = 0; iswap < nswap; iswap++) {
-
     if (sendproc[iswap] != me) {
       if (comm_x_only) {
-        atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK);
-        if (size_forward_recv[iswap]) buf = x[firstrecv[iswap]];
-        else buf = NULL;
-
         if (size_forward_recv[iswap]) {
             buf = atomKK->k_x.view<DeviceType>().ptr_on_device() +
               firstrecv[iswap]*atomKK->k_x.view<DeviceType>().dimension_1();
             MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
-                    recvproc[iswap],0,world,&request);
+                      recvproc[iswap],0,world,&request);
         }
         n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,
                                    iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]);
-
         if (n) {
           MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),
                    n,MPI_DOUBLE,sendproc[iswap],0,world);
         }
 
-        if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
-        atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::
-                         space,X_MASK);
+        if (size_forward_recv[iswap]) {
+          MPI_Wait(&request,MPI_STATUS_IGNORE);
+          atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::
+                           space,X_MASK);
+        }
       } else if (ghost_velocity) {
         error->all(FLERR,"Ghost velocity forward comm not yet "
                    "implemented with Kokkos");
@@ -248,21 +250,93 @@ void CommKokkos::forward_comm_device(int dummy)
     }
   }
 }
+
+/* ----------------------------------------------------------------------
+   reverse communication of forces on atoms every timestep
+   other per-atom attributes may also be sent via pack/unpack routines
+------------------------------------------------------------------------- */
+
 void CommKokkos::reverse_comm()
 {
+  if (!reverse_comm_classic) {
+    if (reverse_comm_on_host) reverse_comm_device<LMPHostType>();
+    else reverse_comm_device<LMPDeviceType>();
+    return;
+  }
+
   k_sendlist.sync<LMPHostType>();
+
   if (comm_f_only)
     atomKK->sync(Host,F_MASK);
   else
     atomKK->sync(Host,ALL_MASK);
+
   CommBrick::reverse_comm();
+
   if (comm_f_only)
     atomKK->modified(Host,F_MASK);
   else
     atomKK->modified(Host,ALL_MASK);
-  atomKK->sync(Device,ALL_MASK);
+
+  //atomKK->sync(Device,ALL_MASK); // is this needed?
+}
+
+template<class DeviceType>
+void CommKokkos::reverse_comm_device()
+{
+  int n;
+  MPI_Request request;
+  AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec;
+  double *buf;
+  
+  // exchange data with another proc
+  // if other proc is self, just copy
+  // if comm_f_only set, exchange or copy directly from f, don't pack
+
+  k_sendlist.sync<DeviceType>();
+  atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,F_MASK);
+
+  for (int iswap = nswap-1; iswap >= 0; iswap--) {
+    if (sendproc[iswap] != me) {
+      if (comm_f_only) {
+        if (size_reverse_recv[iswap])
+            MPI_Irecv(k_buf_recv.view<DeviceType>().ptr_on_device(),size_reverse_recv[iswap],MPI_DOUBLE,
+                    sendproc[iswap],0,world,&request);
+        if (size_reverse_send[iswap]) {
+          buf = atomKK->k_f.view<DeviceType>().ptr_on_device() +
+            firstrecv[iswap]*atomKK->k_f.view<DeviceType>().dimension_1();
+  
+          MPI_Send(buf,size_reverse_send[iswap],MPI_DOUBLE,
+                   recvproc[iswap],0,world);
+        }
+        if (size_reverse_recv[iswap]) {
+          MPI_Wait(&request,MPI_STATUS_IGNORE);
+          atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::
+                           space,F_MASK);
+        }
+      } else {
+        if (size_reverse_recv[iswap])
+          MPI_Irecv(k_buf_recv.view<DeviceType>().ptr_on_device(),
+                    size_reverse_recv[iswap],MPI_DOUBLE,
+                    sendproc[iswap],0,world,&request);
+        n = avec->pack_reverse_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send);
+        if (n)
+          MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),n,
+                   MPI_DOUBLE,recvproc[iswap],0,world);
+        if (size_reverse_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
+      }
+      avec->unpack_reverse_kokkos(sendnum[iswap],k_sendlist,iswap,
+                                k_buf_recv);
+    } else {
+      if (sendnum[iswap])
+        n = avec->unpack_reverse_self(sendnum[iswap],k_sendlist,iswap,
+                                 firstrecv[iswap]);
+    }
+  }
 }
 
+/* ---------------------------------------------------------------------- */
+
 void CommKokkos::forward_comm_fix(Fix *fix, int size)
 {
   k_sendlist.sync<LMPHostType>();
@@ -408,7 +482,7 @@ struct BuildExchangeListFunctor {
   typename AT::t_x_array _x;
 
   int _nlocal,_dim;
-  typename AT::t_int_1d _nsend;
+  typename AT::t_int_scalar _nsend;
   typename AT::t_int_1d _sendlist;
   typename AT::t_int_1d _sendflag;
 
@@ -416,7 +490,7 @@ struct BuildExchangeListFunctor {
   BuildExchangeListFunctor(
       const typename AT::tdual_x_array x,
       const typename AT::tdual_int_1d sendlist,
-      typename AT::tdual_int_1d nsend,
+      typename AT::tdual_int_scalar nsend,
       typename AT::tdual_int_1d sendflag,int nlocal, int dim,
                 X_FLOAT lo, X_FLOAT hi):
                 _x(x.template view<DeviceType>()),
@@ -430,7 +504,7 @@ struct BuildExchangeListFunctor {
   KOKKOS_INLINE_FUNCTION
   void operator() (int i) const {
     if (_x(i,_dim) < _lo || _x(i,_dim) >= _hi) {
-      const int mysend=Kokkos::atomic_fetch_add(&_nsend(0),1);
+      const int mysend=Kokkos::atomic_fetch_add(&_nsend(),1);
       if(mysend<_sendlist.dimension_0()) {
         _sendlist(mysend) = i;
         _sendflag(i) = 1;
@@ -489,9 +563,9 @@ void CommKokkos::exchange_device()
     if (true) {
       if (k_sendflag.h_view.dimension_0()<nlocal) k_sendflag.resize(nlocal);
       k_sendflag.sync<DeviceType>();
-      k_count.h_view(0) = k_exchange_sendlist.h_view.dimension_0();
-      while (k_count.h_view(0)>=k_exchange_sendlist.h_view.dimension_0()) {
-        k_count.h_view(0) = 0;
+      k_count.h_view() = k_exchange_sendlist.h_view.dimension_0();
+      while (k_count.h_view()>=k_exchange_sendlist.h_view.dimension_0()) {
+        k_count.h_view() = 0;
         k_count.modify<LMPHostType>();
         k_count.sync<DeviceType>();
 
@@ -504,10 +578,10 @@ void CommKokkos::exchange_device()
         k_count.modify<DeviceType>();
 
         k_count.sync<LMPHostType>();
-        if (k_count.h_view(0)>=k_exchange_sendlist.h_view.dimension_0()) {
-          k_exchange_sendlist.resize(k_count.h_view(0)*1.1);
-          k_exchange_copylist.resize(k_count.h_view(0)*1.1);
-          k_count.h_view(0)=k_exchange_sendlist.h_view.dimension_0();
+        if (k_count.h_view()>=k_exchange_sendlist.h_view.dimension_0()) {
+          k_exchange_sendlist.resize(k_count.h_view()*1.1);
+          k_exchange_copylist.resize(k_count.h_view()*1.1);
+          k_count.h_view()=k_exchange_sendlist.h_view.dimension_0();
         }
       }
       k_exchange_copylist.sync<LMPHostType>();
@@ -515,22 +589,22 @@ void CommKokkos::exchange_device()
       k_sendflag.sync<LMPHostType>();
 
       int sendpos = nlocal-1;
-      nlocal -= k_count.h_view(0);
-      for(int i = 0; i < k_count.h_view(0); i++) {
+      nlocal -= k_count.h_view();
+      for(int i = 0; i < k_count.h_view(); i++) {
         if (k_exchange_sendlist.h_view(i)<nlocal) {
           while (k_sendflag.h_view(sendpos)) sendpos--;
           k_exchange_copylist.h_view(i) = sendpos;
           sendpos--;
         } else
-        k_exchange_copylist.h_view(i) = -1;
+          k_exchange_copylist.h_view(i) = -1;
       }
 
       k_exchange_copylist.modify<LMPHostType>();
       k_exchange_copylist.sync<DeviceType>();
-      nsend = k_count.h_view(0);
+      nsend = k_count.h_view();
       if (nsend > maxsend) grow_send_kokkos(nsend,1);
       nsend =
-        avec->pack_exchange_kokkos(k_count.h_view(0),k_buf_send,
+        avec->pack_exchange_kokkos(k_count.h_view(),k_buf_send,
                                    k_exchange_sendlist,k_exchange_copylist,
                                    ExecutionSpaceFromDevice<DeviceType>::
                                    space,dim,lo,hi);
@@ -640,9 +714,7 @@ void CommKokkos::borders()
   }
 
   atomKK->sync(Host,ALL_MASK);
-  atomKK->modified(Host,ALL_MASK);
   k_sendlist.sync<LMPHostType>();
-  k_sendlist.modify<LMPHostType>();
   CommBrick::borders();
   k_sendlist.modify<LMPHostType>();
   atomKK->modified(Host,ALL_MASK);
@@ -659,11 +731,11 @@ struct BuildBorderListFunctor {
   int iswap,maxsendlist;
   int nfirst,nlast,dim;
   typename AT::t_int_2d sendlist;
-  typename AT::t_int_1d nsend;
+  typename AT::t_int_scalar nsend;
 
   BuildBorderListFunctor(typename AT::tdual_x_array _x,
                          typename AT::tdual_int_2d _sendlist,
-                         typename AT::tdual_int_1d _nsend,int _nfirst,
+                         typename AT::tdual_int_scalar _nsend,int _nfirst,
                          int _nlast, int _dim,
                          X_FLOAT _lo, X_FLOAT _hi, int _iswap,
                          int _maxsendlist):
@@ -684,7 +756,7 @@ struct BuildBorderListFunctor {
     for (int i=teamstart + dev.team_rank(); i<teamend; i+=dev.team_size()) {
       if (x(i,dim) >= lo && x(i,dim) <= hi) mysend++;
     }
-    const int my_store_pos = dev.team_scan(mysend,&nsend(0));
+    const int my_store_pos = dev.team_scan(mysend,&nsend());
 
     if (my_store_pos+mysend < maxsendlist) {
     mysend = my_store_pos;
@@ -713,7 +785,7 @@ void CommKokkos::borders_device() {
   AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec;
 
   ExecutionSpace exec_space = ExecutionSpaceFromDevice<DeviceType>::space;
-  k_sendlist.modify<DeviceType>();
+  k_sendlist.sync<DeviceType>();
   atomKK->sync(exec_space,ALL_MASK);
 
   // do swaps over all 3 dimensions
@@ -763,37 +835,38 @@ void CommKokkos::borders_device() {
       if (sendflag) {
         if (!bordergroup || ineed >= 2) {
           if (style == SINGLE) {
-            typename ArrayTypes<DeviceType>::tdual_int_1d total_send("TS",1);
-            total_send.h_view(0) = 0;
-            if(exec_space == Device) {
-              total_send.template modify<DeviceType>();
-              total_send.template sync<LMPDeviceType>();
-            }
+            k_total_send.h_view() = 0;
+            k_total_send.template modify<LMPHostType>();
+            k_total_send.template sync<LMPDeviceType>();
 
             BuildBorderListFunctor<DeviceType> f(atomKK->k_x,k_sendlist,
-                total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]);
+                k_total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]);
             Kokkos::TeamPolicy<DeviceType> config((nlast-nfirst+127)/128,128);
             Kokkos::parallel_for(config,f);
 
-            total_send.template modify<DeviceType>();
-            total_send.template sync<LMPHostType>();
+            k_total_send.template modify<DeviceType>();
+            k_total_send.template sync<LMPHostType>();
+
+            k_sendlist.modify<DeviceType>();
+
+            if(k_total_send.h_view() >= maxsendlist[iswap]) {
+              grow_list(iswap,k_total_send.h_view());
+
+              k_total_send.h_view() = 0;
+              k_total_send.template modify<LMPHostType>();
+              k_total_send.template sync<LMPDeviceType>();
 
-            if(total_send.h_view(0) >= maxsendlist[iswap]) {
-              grow_list(iswap,total_send.h_view(0));
-              k_sendlist.modify<DeviceType>();
-              total_send.h_view(0) = 0;
-              if(exec_space == Device) {
-                total_send.template modify<LMPHostType>();
-                total_send.template sync<LMPDeviceType>();
-              }
               BuildBorderListFunctor<DeviceType> f(atomKK->k_x,k_sendlist,
-                  total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]);
+                  k_total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]);
               Kokkos::TeamPolicy<DeviceType> config((nlast-nfirst+127)/128,128);
               Kokkos::parallel_for(config,f);
-              total_send.template modify<DeviceType>();
-              total_send.template sync<LMPHostType>();
+
+              k_total_send.template modify<DeviceType>();
+              k_total_send.template sync<LMPHostType>();
+
+              k_sendlist.modify<DeviceType>();
             }
-            nsend = total_send.h_view(0);
+            nsend = k_total_send.h_view();
           } else {
             error->all(FLERR,"Required border comm not yet "
                        "implemented with Kokkos");
@@ -916,10 +989,11 @@ void CommKokkos::borders_device() {
 
   // reset global->local map
 
-  if (exec_space == Host) k_sendlist.sync<LMPDeviceType>();
   atomKK->modified(exec_space,ALL_MASK);
-  atomKK->sync(Host,TAG_MASK);
-  if (map_style) atom->map_set();
+  if (map_style) {
+    atomKK->sync(Host,TAG_MASK);
+    atom->map_set();
+  }
 }
 /* ----------------------------------------------------------------------
    realloc the size of the send buffer as needed with BUFFACTOR and bufextra
@@ -961,7 +1035,7 @@ void CommKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space)
     buf_send = k_buf_send.view<LMPHostType>().ptr_on_device();
   }
   else {
-    k_buf_send = ArrayTypes<LMPDeviceType>::
+    k_buf_send = DAT::
       tdual_xfloat_2d("comm:k_buf_send",maxsend_border,atom->avec->size_border);
     buf_send = k_buf_send.view<LMPHostType>().ptr_on_device();
   }
@@ -975,7 +1049,7 @@ void CommKokkos::grow_recv_kokkos(int n, ExecutionSpace space)
 {
   maxrecv = static_cast<int> (BUFFACTOR * n);
   int maxrecv_border = (maxrecv+BUFEXTRA+5)/atom->avec->size_border + 2;
-  k_buf_recv = ArrayTypes<LMPDeviceType>::
+  k_buf_recv = DAT::
     tdual_xfloat_2d("comm:k_buf_recv",maxrecv_border,atom->avec->size_border);
   buf_recv = k_buf_recv.view<LMPHostType>().ptr_on_device();
 }
@@ -988,6 +1062,11 @@ void CommKokkos::grow_list(int iswap, int n)
 {
   int size = static_cast<int> (BUFFACTOR * n);
 
+  if (exchange_comm_classic) { // force realloc on Host
+    k_sendlist.sync<LMPHostType>();
+    k_sendlist.modify<LMPHostType>();
+  }
+
   memory->grow_kokkos(k_sendlist,sendlist,maxswap,size,"comm:sendlist");
 
   for(int i=0;i<maxswap;i++) {
@@ -1011,6 +1090,11 @@ void CommKokkos::grow_swap(int n)
   maxswap = n;
   int size = MAX(k_sendlist.d_view.dimension_1(),BUFMIN);
 
+  if (exchange_comm_classic) { // force realloc on Host
+    k_sendlist.sync<LMPHostType>();
+    k_sendlist.modify<LMPHostType>();
+  }
+
   memory->grow_kokkos(k_sendlist,sendlist,maxswap,size,"comm:sendlist");
 
   memory->grow(maxsendlist,n,"comm:maxsendlist");
diff --git a/src/KOKKOS/comm_kokkos.h b/src/KOKKOS/comm_kokkos.h
index a8ae973124f552167b19812ece2975b4d5072656..f137655cb8047fb1340d55283acc5e03ef59841f 100644
--- a/src/KOKKOS/comm_kokkos.h
+++ b/src/KOKKOS/comm_kokkos.h
@@ -25,15 +25,17 @@ class CommKokkos : public CommBrick {
 
   bool exchange_comm_classic;
   bool forward_comm_classic;
+  bool reverse_comm_classic;
   bool exchange_comm_on_host;
   bool forward_comm_on_host;
+  bool reverse_comm_on_host;
 
   CommKokkos(class LAMMPS *);
   ~CommKokkos();
   void init();
 
   void forward_comm(int dummy = 0);    // forward comm of atom coords
-  void reverse_comm();              // reverse comm of atom coords
+  void reverse_comm();                 // reverse comm of atom coords
   void exchange();                     // move atoms to new procs
   void borders();                      // setup list of atoms to comm
 
@@ -47,15 +49,17 @@ class CommKokkos : public CommBrick {
   void reverse_comm_dump(class Dump *);    // reverse comm from a Dump
 
   template<class DeviceType> void forward_comm_device(int dummy);
+  template<class DeviceType> void reverse_comm_device();
   template<class DeviceType> void forward_comm_pair_device(Pair *pair);
   template<class DeviceType> void exchange_device();
   template<class DeviceType> void borders_device();
 
  protected:
   DAT::tdual_int_2d k_sendlist;
+  DAT::tdual_int_scalar k_total_send;
   DAT::tdual_xfloat_2d k_buf_send,k_buf_recv;
   DAT::tdual_int_1d k_exchange_sendlist,k_exchange_copylist,k_sendflag;
-  DAT::tdual_int_1d k_count;
+  DAT::tdual_int_scalar k_count;
   //double *buf_send;                 // send buffer for all comm
   //double *buf_recv;                 // recv buffer for all comm
 
diff --git a/src/KOKKOS/fix_qeq_reax_kokkos.cpp b/src/KOKKOS/fix_qeq_reax_kokkos.cpp
index e54b53ae89dfae19de258c12234728625f6650fe..5d2f6a0438a400302785c70975beee07d4d4f32b 100644
--- a/src/KOKKOS/fix_qeq_reax_kokkos.cpp
+++ b/src/KOKKOS/fix_qeq_reax_kokkos.cpp
@@ -63,6 +63,7 @@ FixQEqReaxKokkos(LAMMPS *lmp, int narg, char **arg) :
 
   nmax = nmax = m_cap = 0;
   allocated_flag = 0;
+  nprev = 4;
 }
 
 /* ---------------------------------------------------------------------- */
@@ -158,15 +159,15 @@ void FixQEqReaxKokkos<DeviceType>::init_hist()
 {
   int i,j;
 
-  k_s_hist = DAT::tdual_ffloat_2d("qeq/kk:s_hist",atom->nmax,5);
+  k_s_hist = DAT::tdual_ffloat_2d("qeq/kk:s_hist",atom->nmax,nprev);
   d_s_hist = k_s_hist.template view<DeviceType>();
   h_s_hist = k_s_hist.h_view;
-  k_t_hist = DAT::tdual_ffloat_2d("qeq/kk:t_hist",atom->nmax,5);
+  k_t_hist = DAT::tdual_ffloat_2d("qeq/kk:t_hist",atom->nmax,nprev);
   d_t_hist = k_t_hist.template view<DeviceType>();
   h_t_hist = k_t_hist.h_view;
 
   for( i = 0; i < atom->nmax; i++ )
-    for( j = 0; j < 5; j++ )
+    for( j = 0; j < nprev; j++ )
       k_s_hist.h_view(i,j) = k_t_hist.h_view(i,j) = 0.0;
 
   k_s_hist.template modify<LMPHostType>();
@@ -334,11 +335,11 @@ void FixQEqReaxKokkos<DeviceType>::allocate_array()
     d_d = k_d.template view<DeviceType>();
     h_d = k_d.h_view;
 
-    k_s_hist = DAT::tdual_ffloat_2d("qeq/kk:s_hist",nmax,5);
+    k_s_hist = DAT::tdual_ffloat_2d("qeq/kk:s_hist",nmax,nprev);
     d_s_hist = k_s_hist.template view<DeviceType>();
     h_s_hist = k_s_hist.h_view;
 
-    k_t_hist = DAT::tdual_ffloat_2d("qeq/kk:t_hist",nmax,5);
+    k_t_hist = DAT::tdual_ffloat_2d("qeq/kk:t_hist",nmax,nprev);
     d_t_hist = k_t_hist.template view<DeviceType>();
     h_t_hist = k_t_hist.h_view;
   }
@@ -368,7 +369,7 @@ void FixQEqReaxKokkos<DeviceType>::zero_item(int ii) const
     d_o[i] = 0.0;
     d_r[i] = 0.0;
     d_d[i] = 0.0;
-    //for( int j = 0; j < 5; j++ )
+    //for( int j = 0; j < nprev; j++ )
       //d_s_hist(i,j) = d_t_hist(i,j) = 0.0;
   }
 
@@ -1087,7 +1088,7 @@ void FixQEqReaxKokkos<DeviceType>::calculate_q_item(int ii) const
   if (mask[i] & groupbit) {
     q(i) = d_s[i] - delta * d_t[i];
 
-    for (int k = 4; k > 0; --k) {
+    for (int k = nprev-1; k > 0; --k) {
       d_s_hist(i,k) = d_s_hist(i,k-1);
       d_t_hist(i,k) = d_t_hist(i,k-1);
     }
@@ -1173,7 +1174,7 @@ double FixQEqReaxKokkos<DeviceType>::memory_usage()
 {
   double bytes;
 
-  bytes = atom->nmax*5*2 * sizeof(F_FLOAT); // s_hist & t_hist
+  bytes = atom->nmax*nprev*2 * sizeof(F_FLOAT); // s_hist & t_hist
   bytes += atom->nmax*8 * sizeof(F_FLOAT); // storage
   bytes += n_cap*2 * sizeof(int); // matrix...
   bytes += m_cap * sizeof(int);
diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp
index 072a802b54e7534a8139e6f9092c65354b2c1ce9..2b02624dcef30f29947183cb4f2a5acb68916779 100644
--- a/src/KOKKOS/kokkos.cpp
+++ b/src/KOKKOS/kokkos.cpp
@@ -123,8 +123,10 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
   neighflag_qeq_set = 0;
   exchange_comm_classic = 0;
   forward_comm_classic = 0;
+  reverse_comm_classic = 0;
   exchange_comm_on_host = 0;
   forward_comm_on_host = 0;
+  reverse_comm_on_host = 0;
 
 #ifdef KILL_KOKKOS_ON_SIGSEGV
   signal(SIGSEGV, my_signal_handler);
@@ -158,8 +160,8 @@ void KokkosLMP::accelerator(int narg, char **arg)
   neighflag_qeq_set = 0;
   int newtonflag = 0;
   double binsize = 0.0;
-  exchange_comm_classic = forward_comm_classic = 0;
-  exchange_comm_on_host = forward_comm_on_host = 0;
+  exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
+  exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
 
   int iarg = 0;
   while (iarg < narg) {
@@ -200,13 +202,13 @@ void KokkosLMP::accelerator(int narg, char **arg)
     } else if (strcmp(arg[iarg],"comm") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
       if (strcmp(arg[iarg+1],"no") == 0) {
-        exchange_comm_classic = forward_comm_classic = 1;
+        exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 1;
       } else if (strcmp(arg[iarg+1],"host") == 0) {
-        exchange_comm_classic = forward_comm_classic = 0;
-        exchange_comm_on_host = forward_comm_on_host = 1;
+        exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
+        exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 1;
       } else if (strcmp(arg[iarg+1],"device") == 0) {
-        exchange_comm_classic = forward_comm_classic = 0;
-        exchange_comm_on_host = forward_comm_on_host = 0;
+        exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
+        exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
       } else error->all(FLERR,"Illegal package kokkos command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"comm/exchange") == 0) {
@@ -231,6 +233,17 @@ void KokkosLMP::accelerator(int narg, char **arg)
         forward_comm_on_host = 0;
       } else error->all(FLERR,"Illegal package kokkos command");
       iarg += 2;
+    } else if (strcmp(arg[iarg],"comm/reverse") == 0) {
+      if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
+      if (strcmp(arg[iarg+1],"no") == 0) reverse_comm_classic = 1;
+      else if (strcmp(arg[iarg+1],"host") == 0) {
+        reverse_comm_classic = 0;
+        reverse_comm_on_host = 1;
+      } else if (strcmp(arg[iarg+1],"device") == 0) {
+        reverse_comm_classic = 0;
+        reverse_comm_on_host = 0;
+      } else error->all(FLERR,"Illegal package kokkos command");
+      iarg += 2;
     } else error->all(FLERR,"Illegal package kokkos command");
   }
 
diff --git a/src/KOKKOS/kokkos.h b/src/KOKKOS/kokkos.h
index 8e28b38cbf45b1e14c2a6e6da7fc59ff0f7b0920..7b7848f1f08ddfe3dbdbfcadfc13874e53218988 100644
--- a/src/KOKKOS/kokkos.h
+++ b/src/KOKKOS/kokkos.h
@@ -27,8 +27,10 @@ class KokkosLMP : protected Pointers {
   int neighflag_qeq_set;
   int exchange_comm_classic;
   int forward_comm_classic;
+  int reverse_comm_classic;
   int exchange_comm_on_host;
   int forward_comm_on_host;
+  int reverse_comm_on_host;
   int num_threads,ngpu;
   int numa;
   int auto_sync;
diff --git a/src/KOKKOS/nbin_kokkos.cpp b/src/KOKKOS/nbin_kokkos.cpp
index c7e815928a185f7255d36749121d17615d546661..95ea105ad9df6d95f0d2ddba64154f1af64d0579 100644
--- a/src/KOKKOS/nbin_kokkos.cpp
+++ b/src/KOKKOS/nbin_kokkos.cpp
@@ -75,6 +75,10 @@ void NBinKokkos<DeviceType>::bin_atoms_setup(int nall)
     k_bincount = DAT::tdual_int_1d("Neighbor::d_bincount",mbins);
     bincount = k_bincount.view<DeviceType>();
   }
+  if (nall > k_atom2bin.d_view.dimension_0()) {
+    k_atom2bin = DAT::tdual_int_1d("Neighbor::d_atom2bin",nall);
+    atom2bin = k_atom2bin.view<DeviceType>();
+  }
 }
 
 /* ----------------------------------------------------------------------
@@ -86,6 +90,10 @@ void NBinKokkos<DeviceType>::bin_atoms()
 {
   last_bin = update->ntimestep;
 
+  k_bins.template sync<DeviceType>();
+  k_bincount.template sync<DeviceType>();
+  k_atom2bin.template sync<DeviceType>();
+
   h_resize() = 1;
 
   while(h_resize() > 0) {
@@ -115,6 +123,10 @@ void NBinKokkos<DeviceType>::bin_atoms()
       c_bins = bins;
     }
   }
+
+  k_bins.template modify<DeviceType>();
+  k_bincount.template modify<DeviceType>();
+  k_atom2bin.template modify<DeviceType>();
 }
 
 /* ---------------------------------------------------------------------- */
@@ -125,6 +137,7 @@ void NBinKokkos<DeviceType>::binatomsItem(const int &i) const
 {
   const int ibin = coord2bin(x(i, 0), x(i, 1), x(i, 2));
 
+  atom2bin(i) = ibin;
   const int ac = Kokkos::atomic_fetch_add(&bincount[ibin], (int)1);
   if(ac < bins.dimension_1()) {
     bins(ibin, ac) = i;
diff --git a/src/KOKKOS/nbin_kokkos.h b/src/KOKKOS/nbin_kokkos.h
index de3cf41d19c816952c6e4b1540b536d1baa42a77..bf2ccc59086ed0ed1a27dfd545bab85dbd327911 100644
--- a/src/KOKKOS/nbin_kokkos.h
+++ b/src/KOKKOS/nbin_kokkos.h
@@ -44,11 +44,13 @@ class NBinKokkos : public NBinStandard {
   int atoms_per_bin;
   DAT::tdual_int_1d k_bincount;
   DAT::tdual_int_2d k_bins;
+  DAT::tdual_int_1d k_atom2bin;
 
   typename AT::t_int_1d bincount;
   const typename AT::t_int_1d_const c_bincount;
   typename AT::t_int_2d bins;
   typename AT::t_int_2d_const c_bins;
+  typename AT::t_int_1d atom2bin;
   typename AT::t_int_scalar d_resize;
   typename ArrayTypes<LMPHostType>::t_int_scalar h_resize;
   typename AT::t_x_array_randomread x;
diff --git a/src/KOKKOS/neigh_list_kokkos.cpp b/src/KOKKOS/neigh_list_kokkos.cpp
index caf2dfee561c49b3482750871157d52e36a75d99..04454e53cb302e2ad696809c034e1925a8aaa83d 100644
--- a/src/KOKKOS/neigh_list_kokkos.cpp
+++ b/src/KOKKOS/neigh_list_kokkos.cpp
@@ -49,15 +49,6 @@ void NeighListKokkos<Device>::grow(int nmax)
   d_neighbors =
     typename ArrayTypes<Device>::t_neighbors_2d("neighlist:neighbors",
                                                 maxatoms,maxneighs);
-
-  memory->sfree(firstneigh);
-  memory->sfree(firstdouble);
-
-  firstneigh = (int **) memory->smalloc(maxatoms*sizeof(int *),
-                                        "neighlist:firstneigh");
-  if (dnum)
-    firstdouble = (double **) memory->smalloc(maxatoms*sizeof(double *),
-                                              "neighlist:firstdouble");
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/KOKKOS/neighbor_kokkos.cpp b/src/KOKKOS/neighbor_kokkos.cpp
index 9a40808052f138bfaaf4bfbef49515f5a08386df..f34b149864d1c24179fdf2f41be36143ed5f9807 100644
--- a/src/KOKKOS/neighbor_kokkos.cpp
+++ b/src/KOKKOS/neighbor_kokkos.cpp
@@ -310,9 +310,9 @@ void NeighborKokkos::build_kokkos(int topoflag)
   // build pairwise lists for all perpetual NPair/NeighList
   // grow() with nlocal/nall args so that only realloc if have to
 
-  atomKK->sync(Host,ALL_MASK);
   for (i = 0; i < npair_perpetual; i++) {
     m = plist[i];
+    if (!lists[m]->kokkos) atomKK->sync(Host,ALL_MASK);
     if (!lists[m]->copy) lists[m]->grow(nlocal,nall);
     neigh_pair[m]->build_setup();
     neigh_pair[m]->build(lists[m]);
diff --git a/src/KOKKOS/npair_copy_kokkos.cpp b/src/KOKKOS/npair_copy_kokkos.cpp
index 6835d8c1b54ee37d5be17f7d60cfc1dda1370cbd..8702816033799d00aa871d47012404326f831ce8 100644
--- a/src/KOKKOS/npair_copy_kokkos.cpp
+++ b/src/KOKKOS/npair_copy_kokkos.cpp
@@ -41,10 +41,7 @@ void NPairCopyKokkos<DeviceType>::build(NeighList *list)
   list->gnum = listcopy->gnum;
   list->ilist = listcopy->ilist;
   list->numneigh = listcopy->numneigh;
-  list->firstneigh = listcopy->firstneigh;
-  list->firstdouble = listcopy->firstdouble;
   list->ipage = listcopy->ipage;
-  list->dpage = listcopy->dpage;
 
   NeighListKokkos<DeviceType>* list_kk = (NeighListKokkos<DeviceType>*) list;
   NeighListKokkos<DeviceType>* listcopy_kk = (NeighListKokkos<DeviceType>*) list->listcopy;
diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp
index b568bd5c93923e0e25fe6fa32fb913b9f602c651..d3cdcb0680efef3420d0e1a44f966a265759defc 100644
--- a/src/KOKKOS/npair_kokkos.cpp
+++ b/src/KOKKOS/npair_kokkos.cpp
@@ -73,6 +73,7 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::copy_bin_info()
   atoms_per_bin = nbKK->atoms_per_bin;
   k_bincount = nbKK->k_bincount;
   k_bins = nbKK->k_bins;
+  k_atom2bin = nbKK->k_atom2bin;
 }
 
 /* ----------------------------------------------------------------------
@@ -88,13 +89,15 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::copy_stencil_info()
 
   int maxstencil = ns->get_maxstencil();
 
-  k_stencil = DAT::tdual_int_1d("neighlist:stencil",maxstencil);
+  if (maxstencil > k_stencil.dimension_0())
+    k_stencil = DAT::tdual_int_1d("neighlist:stencil",maxstencil);
   for (int k = 0; k < maxstencil; k++)
     k_stencil.h_view(k) = ns->stencil[k];
     k_stencil.modify<LMPHostType>();
     k_stencil.sync<DeviceType>();
   if (GHOST) {
-    k_stencilxyz = DAT::tdual_int_1d_3("neighlist:stencilxyz",maxstencil);
+    if (maxstencil > k_stencilxyz.dimension_0())
+      k_stencilxyz = DAT::tdual_int_1d_3("neighlist:stencilxyz",maxstencil);
     for (int k = 0; k < maxstencil; k++) {
       k_stencilxyz.h_view(k,0) = ns->stencilxyz[k][0];
       k_stencilxyz.h_view(k,1) = ns->stencilxyz[k][1];
@@ -122,6 +125,7 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::build(NeighList *list_)
          k_cutneighsq.view<DeviceType>(),
          k_bincount.view<DeviceType>(),
          k_bins.view<DeviceType>(),
+         k_atom2bin.view<DeviceType>(),
          nstencil,
          k_stencil.view<DeviceType>(),
          k_stencilxyz.view<DeviceType>(),
@@ -164,8 +168,9 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::build(NeighList *list_)
   k_ex_mol_group.sync<DeviceType>();
   k_ex_mol_bit.sync<DeviceType>();
   k_ex_mol_intra.sync<DeviceType>();
-  k_bincount.sync<DeviceType>(),
-  k_bins.sync<DeviceType>(),
+  k_bincount.sync<DeviceType>();
+  k_bins.sync<DeviceType>();
+  k_atom2bin.sync<DeviceType>();
   atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK);
 
   data.special_flag[0] = special_flag[0];
@@ -317,7 +322,7 @@ void NeighborKokkosExecute<DeviceType>::
   const X_FLOAT ztmp = x(i, 2);
   const int itype = type(i);
 
-  const int ibin = coord2bin(xtmp, ytmp, ztmp);
+  const int ibin = c_atom2bin(i);
 
   const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil
     = d_stencil;
@@ -431,7 +436,7 @@ void NeighborKokkosExecute<DeviceType>::
   if(n > neigh_list.maxneighs) {
     resize() = 1;
 
-    if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n);
+    if(n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
   }
 
   neigh_list.d_ilist(i) = i;
@@ -641,7 +646,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
   if(n > neigh_list.maxneighs) {
     resize() = 1;
 
-    if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n);
+    if(n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
   }
   }
 }
@@ -678,7 +683,7 @@ void NeighborKokkosExecute<DeviceType>::
   // no molecular test when i = ghost atom
 
   if (i < nlocal) {
-    const int ibin = coord2bin(xtmp, ytmp, ztmp);
+    const int ibin = c_atom2bin(i);
     for (int k = 0; k < nstencil; k++) {
       const int jbin = ibin + stencil[k];
       for(int m = 0; m < c_bincount(jbin); m++) {
@@ -764,7 +769,7 @@ void NeighborKokkosExecute<DeviceType>::
   if(n > neigh_list.maxneighs) {
     resize() = 1;
 
-    if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n);
+    if(n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
   }
   neigh_list.d_ilist(i) = i;
 }
diff --git a/src/KOKKOS/npair_kokkos.h b/src/KOKKOS/npair_kokkos.h
index 517ea546fa1cff1e053fe84105a615316f8687f6..6c1c0e958b49dcd09ff7bb6b32efca67b2d3f6a6 100644
--- a/src/KOKKOS/npair_kokkos.h
+++ b/src/KOKKOS/npair_kokkos.h
@@ -105,6 +105,7 @@ class NPairKokkos : public NPair {
   int atoms_per_bin;
   DAT::tdual_int_1d k_bincount;
   DAT::tdual_int_2d k_bins;
+  DAT::tdual_int_1d k_atom2bin;
 
   // data from NStencil class
 
@@ -148,6 +149,8 @@ class NeighborKokkosExecute
   const typename AT::t_int_1d_const c_bincount;
   typename AT::t_int_2d bins;
   typename AT::t_int_2d_const c_bins;
+  const typename AT::t_int_1d atom2bin;
+  const typename AT::t_int_1d_const c_atom2bin;
 
 
   // data from NStencil class
@@ -190,6 +193,7 @@ class NeighborKokkosExecute
                         const typename AT::t_xfloat_2d_randomread &_cutneighsq,
                         const typename AT::t_int_1d &_bincount,
                         const typename AT::t_int_2d &_bins,
+                        const typename AT::t_int_1d &_atom2bin,
                         const int _nstencil,
                         const typename AT::t_int_1d &_d_stencil,
                         const typename AT::t_int_1d_3 &_d_stencilxyz,
@@ -224,6 +228,7 @@ class NeighborKokkosExecute
                         const int & _xprd_half, const int & _yprd_half, const int & _zprd_half):
     neigh_list(_neigh_list), cutneighsq(_cutneighsq),
     bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins),
+    atom2bin(_atom2bin),c_atom2bin(_atom2bin),
     nstencil(_nstencil),d_stencil(_d_stencil),d_stencilxyz(_d_stencilxyz),
     nlocal(_nlocal),
     x(_x),type(_type),mask(_mask),molecule(_molecule),
@@ -281,38 +286,6 @@ class NeighborKokkosExecute
   void build_ItemCuda(typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const;
 #endif
 
-  KOKKOS_INLINE_FUNCTION
-  int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z) const
-  {
-    int ix,iy,iz;
-
-    if (x >= bboxhi[0])
-      ix = static_cast<int> ((x-bboxhi[0])*bininvx) + nbinx;
-    else if (x >= bboxlo[0]) {
-      ix = static_cast<int> ((x-bboxlo[0])*bininvx);
-      ix = MIN(ix,nbinx-1);
-    } else
-      ix = static_cast<int> ((x-bboxlo[0])*bininvx) - 1;
-
-    if (y >= bboxhi[1])
-      iy = static_cast<int> ((y-bboxhi[1])*bininvy) + nbiny;
-    else if (y >= bboxlo[1]) {
-      iy = static_cast<int> ((y-bboxlo[1])*bininvy);
-      iy = MIN(iy,nbiny-1);
-    } else
-      iy = static_cast<int> ((y-bboxlo[1])*bininvy) - 1;
-
-    if (z >= bboxhi[2])
-      iz = static_cast<int> ((z-bboxhi[2])*bininvz) + nbinz;
-    else if (z >= bboxlo[2]) {
-      iz = static_cast<int> ((z-bboxlo[2])*bininvz);
-      iz = MIN(iz,nbinz-1);
-    } else
-      iz = static_cast<int> ((z-bboxlo[2])*bininvz) - 1;
-
-    return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
-  }
-
   KOKKOS_INLINE_FUNCTION
   int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z, int* i) const
   {
diff --git a/src/KOKKOS/npair_ssa_kokkos.cpp b/src/KOKKOS/npair_ssa_kokkos.cpp
index b73e54e33fb469dd4ffbaefa8314532eadbe2559..9f447bda1a8038874064a54035e0c9de569c0a0c 100644
--- a/src/KOKKOS/npair_ssa_kokkos.cpp
+++ b/src/KOKKOS/npair_ssa_kokkos.cpp
@@ -70,6 +70,7 @@ void NPairSSAKokkos<DeviceType>::copy_neighbor_info()
   k_ex2_bit = neighborKK->k_ex2_bit;
   k_ex_mol_group = neighborKK->k_ex_mol_group;
   k_ex_mol_bit = neighborKK->k_ex_mol_bit;
+  k_ex_mol_intra = neighborKK->k_ex_mol_intra;
 }
 
 /* ----------------------------------------------------------------------
@@ -217,8 +218,12 @@ int NPairSSAKokkosExecute<DeviceType>::exclusion(const int &i,const int &j,
 
   if (nex_mol) {
     for (m = 0; m < nex_mol; m++)
-      if (mask(i) & ex_mol_bit(m) && mask(j) & ex_mol_bit(m) &&
-          molecule(i) == molecule(j)) return 1;
+      if (ex_mol_intra[m]) { // intra-chain: exclude i-j pair if on same molecule
+        if (mask[i] & ex_mol_bit[m] && mask[j] & ex_mol_bit[m] &&
+            molecule[i] == molecule[j]) return 1;
+      } else                 // exclude i-j pair if on different molecules
+        if (mask[i] & ex_mol_bit[m] && mask[j] & ex_mol_bit[m] &&
+            molecule[i] != molecule[j]) return 1;
   }
 
   return 0;
@@ -418,6 +423,7 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu
          nex_mol,
          k_ex_mol_group.view<DeviceType>(),
          k_ex_mol_bit.view<DeviceType>(),
+         k_ex_mol_intra.view<DeviceType>(),
          bboxhi,bboxlo,
          domain->xperiodic,domain->yperiodic,domain->zperiodic,
          domain->xprd_half,domain->yprd_half,domain->zprd_half);
@@ -432,6 +438,7 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu
   k_ex2_bit.sync<DeviceType>();
   k_ex_mol_group.sync<DeviceType>();
   k_ex_mol_bit.sync<DeviceType>();
+  k_ex_mol_intra.sync<DeviceType>();
   k_bincount.sync<DeviceType>();
   k_bins.sync<DeviceType>();
   k_gbincount.sync<DeviceType>();
diff --git a/src/KOKKOS/npair_ssa_kokkos.h b/src/KOKKOS/npair_ssa_kokkos.h
index 98046feba88b4fb239004228cfa940f74946ca2b..17a23b2811b221db561949e7313635b48f4f0a90 100644
--- a/src/KOKKOS/npair_ssa_kokkos.h
+++ b/src/KOKKOS/npair_ssa_kokkos.h
@@ -76,6 +76,7 @@ class NPairSSAKokkos : public NPair {
   DAT::tdual_int_1d k_ex1_bit,k_ex2_bit;
   DAT::tdual_int_1d k_ex_mol_group;
   DAT::tdual_int_1d k_ex_mol_bit;
+  DAT::tdual_int_1d k_ex_mol_intra;
 
   // data from NBinSSA class
 
@@ -123,6 +124,7 @@ class NPairSSAKokkosExecute
   const int nex_mol;
   const typename AT::t_int_1d_const ex_mol_group;
   const typename AT::t_int_1d_const ex_mol_bit;
+  const typename AT::t_int_1d_const ex_mol_intra;
 
   // data from NBinSSA class
 
@@ -233,6 +235,7 @@ class NPairSSAKokkosExecute
         const int & _nex_mol,
         const typename AT::t_int_1d_const & _ex_mol_group,
         const typename AT::t_int_1d_const & _ex_mol_bit,
+        const typename AT::t_int_1d_const & _ex_mol_intra,
         const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo,
         const int & _xperiodic, const int & _yperiodic, const int & _zperiodic,
         const int & _xprd_half, const int & _yprd_half, const int & _zprd_half):
@@ -266,6 +269,7 @@ class NPairSSAKokkosExecute
     ex1_group(_ex1_group),ex2_group(_ex2_group),
     ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),nex_mol(_nex_mol),
     ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit),
+    ex_mol_intra(_ex_mol_intra),
     xperiodic(_xperiodic),yperiodic(_yperiodic),zperiodic(_zperiodic),
     xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half) {
 
diff --git a/src/KOKKOS/pair_reaxc_kokkos.cpp b/src/KOKKOS/pair_reaxc_kokkos.cpp
index d95cd8f8aefd68b5699ee89046352a30771b3db9..d5f83f45373d9e4d32d6032f0fe343489077e885 100644
--- a/src/KOKKOS/pair_reaxc_kokkos.cpp
+++ b/src/KOKKOS/pair_reaxc_kokkos.cpp
@@ -131,6 +131,8 @@ template<class DeviceType>
 void PairReaxCKokkos<DeviceType>::init_style()
 {
   PairReaxC::init_style();
+  if (fix_reax) modify->delete_fix("REAXC"); // not needed in the Kokkos version
+  fix_reax = NULL;
 
   // irequest = neigh request made by parent class
 
@@ -555,8 +557,8 @@ void PairReaxCKokkos<DeviceType>::Deallocate_Lookup_Tables()
 
   ntypes = atom->ntypes;
 
-  for( i = 0; i < ntypes; ++i ) {
-    for( j = i; j < ntypes; ++j )
+  for( i = 0; i <= ntypes; ++i ) {
+    for( j = i; j <= ntypes; ++j )
       if( LR[i][j].n ) {
         sfree( LR[i][j].y, "LR[i,j].y" );
         sfree( LR[i][j].H, "LR[i,j].H" );
diff --git a/src/KOKKOS/verlet_kokkos.cpp b/src/KOKKOS/verlet_kokkos.cpp
index e4a3f857d3d24829055c6ce5dc6cfd1ff1a02d52..adec5ff1bd9d7520c2c469638d497759375c362a 100644
--- a/src/KOKKOS/verlet_kokkos.cpp
+++ b/src/KOKKOS/verlet_kokkos.cpp
@@ -294,6 +294,7 @@ void VerletKokkos::run(int n)
   int n_pre_exchange = modify->n_pre_exchange;
   int n_pre_neighbor = modify->n_pre_neighbor;
   int n_pre_force = modify->n_pre_force;
+  int n_pre_reverse = modify->n_pre_reverse;
   int n_post_force = modify->n_post_force;
   int n_end_of_step = modify->n_end_of_step;
 
@@ -304,9 +305,9 @@ void VerletKokkos::run(int n)
 
   f_merge_copy = DAT::t_f_array("VerletKokkos::f_merge_copy",atomKK->k_f.dimension_0());
 
-  static double time = 0.0;
   atomKK->sync(Device,ALL_MASK);
-  Kokkos::Impl::Timer ktimer;
+  //static double time = 0.0;
+  //Kokkos::Impl::Timer ktimer;
 
   timer->init_timeout();
   for (int i = 0; i < n; i++) {
@@ -320,10 +321,10 @@ void VerletKokkos::run(int n)
 
     // initial time integration
 
-    ktimer.reset();
+    //ktimer.reset();
     timer->stamp();
     modify->initial_integrate(vflag);
-    time += ktimer.seconds();
+    //time += ktimer.seconds();
     if (n_post_integrate) modify->post_integrate();
     timer->stamp(Timer::MODIFY);
 
@@ -523,11 +524,18 @@ void VerletKokkos::run(int n)
       atomKK->k_f.modify<LMPDeviceType>();
     }
 
+    if (n_pre_reverse) {
+      modify->pre_reverse(eflag,vflag);
+      timer->stamp(Timer::MODIFY);
+    }
 
     // reverse communication of forces
 
-    if (force->newton) comm->reverse_comm();
-    timer->stamp(Timer::COMM);
+    if (force->newton) {
+      Kokkos::fence();
+      comm->reverse_comm();
+      timer->stamp(Timer::COMM);
+    }
 
     // force modifications, final time integration, diagnostics
 
diff --git a/src/KSPACE/pair_buck_long_coul_long.cpp b/src/KSPACE/pair_buck_long_coul_long.cpp
index 4cfb9b72671639a133cc3ddc05b356f7cb9616d9..7df8ebac6874b9ca02a4171b8e2e14666ad0d9b4 100644
--- a/src/KSPACE/pair_buck_long_coul_long.cpp
+++ b/src/KSPACE/pair_buck_long_coul_long.cpp
@@ -233,7 +233,8 @@ void PairBuckLongCoulLong::init_style()
 
   if (!atom->q_flag && (ewald_order&(1<<1)))
     error->all(FLERR,
-        "Invoking coulombic in pair style buck/long/coul/long requires atom attribute q");
+               "Invoking coulombic in pair style buck/long/coul/long "
+               "requires atom attribute q");
 
   // ensure use of KSpace long-range solver, set two g_ewalds
 
@@ -258,51 +259,25 @@ void PairBuckLongCoulLong::init_style()
 
   if (force->kspace->neighrequest_flag) {
     int irequest;
+    int respa = 0;
 
     if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) {
-      int respa = 0;
       if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
       if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
+    }
 
-      if (respa == 0) irequest = neighbor->request(this,instance_me);
-      else if (respa == 1) {
-        irequest = neighbor->request(this,instance_me);
-        neighbor->requests[irequest]->id = 1;
-        neighbor->requests[irequest]->respainner = 1;
-        irequest = neighbor->request(this,instance_me);
-        neighbor->requests[irequest]->id = 3;
-        neighbor->requests[irequest]->respaouter = 1;
-      } else {
-        irequest = neighbor->request(this,instance_me);
-        neighbor->requests[irequest]->id = 1;
-        neighbor->requests[irequest]->respainner = 1;
-        irequest = neighbor->request(this,instance_me);
-        neighbor->requests[irequest]->id = 2;
-        neighbor->requests[irequest]->respamiddle = 1;
-        irequest = neighbor->request(this,instance_me);
-        neighbor->requests[irequest]->id = 3;
-        neighbor->requests[irequest]->respaouter = 1;
-      }
+    irequest = neighbor->request(this,instance_me);
 
-    } else irequest = neighbor->request(this,instance_me);
+    if (respa >= 1) {
+      neighbor->requests[irequest]->respaouter = 1;
+      neighbor->requests[irequest]->respainner = 1;
+    }
+    if (respa == 2) neighbor->requests[irequest]->respamiddle = 1;
   }
 
   cut_coulsq = cut_coul * cut_coul;
 }
 
-/* ----------------------------------------------------------------------
-   neighbor callback to inform pair style of neighbor list to use
-   regular or rRESPA
-------------------------------------------------------------------------- */
-
-void PairBuckLongCoulLong::init_list(int id, NeighList *ptr)
-{
-  if (id == 0) list = ptr;
-  else if (id == 1) listinner = ptr;
-  else if (id == 2) listmiddle = ptr;
-  else if (id == 3) listouter = ptr;
-}
-
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
@@ -651,14 +626,14 @@ void PairBuckLongCoulLong::compute_inner()
   double qri, *cut_bucksqi, *buck1i, *buck2i, *rhoinvi;
   vector xi, d;
 
-  ineighn = (ineigh = listinner->ilist) + listinner->inum;
+  ineighn = (ineigh = list->ilist_inner) + list->inum_inner;
   for (; ineigh<ineighn; ++ineigh) {                        // loop over my atoms
     i = *ineigh; fi = f0+3*i;
     if (order1) qri = qqrd2e*q[i];
     memcpy(xi, x0+(i+(i<<1)), sizeof(vector));
     cut_bucksqi = cut_bucksq[typei = type[i]];
     buck1i = buck1[typei]; buck2i = buck2[typei]; rhoinvi = rhoinv[typei];
-    jneighn = (jneigh = listinner->firstneigh[i])+listinner->numneigh[i];
+    jneighn = (jneigh = list->firstneigh_inner[i])+list->numneigh_inner[i];
 
     for (; jneigh<jneighn; ++jneigh) {                        // loop over neighbors
       j = *jneigh;
@@ -740,7 +715,7 @@ void PairBuckLongCoulLong::compute_middle()
   double qri, *cut_bucksqi, *buck1i, *buck2i, *rhoinvi;
   vector xi, d;
 
-  ineighn = (ineigh = listmiddle->ilist)+listmiddle->inum;
+  ineighn = (ineigh = list->ilist_middle)+list->inum_middle;
 
   for (; ineigh<ineighn; ++ineigh) {                        // loop over my atoms
     i = *ineigh; fi = f0+3*i;
@@ -748,7 +723,7 @@ void PairBuckLongCoulLong::compute_middle()
     memcpy(xi, x0+(i+(i<<1)), sizeof(vector));
     cut_bucksqi = cut_bucksq[typei = type[i]];
     buck1i = buck1[typei]; buck2i = buck2[typei]; rhoinvi = rhoinv[typei];
-    jneighn = (jneigh = listmiddle->firstneigh[i])+listmiddle->numneigh[i];
+    jneighn = (jneigh = list->firstneigh_middle[i])+list->numneigh_middle[i];
 
     for (; jneigh<jneighn; ++jneigh) {                        // loop over neighbors
       j = *jneigh;
@@ -839,7 +814,7 @@ void PairBuckLongCoulLong::compute_outer(int eflag, int vflag)
   double cut_in_off_sq = cut_in_off*cut_in_off;
   double cut_in_on_sq = cut_in_on*cut_in_on;
 
-  ineighn = (ineigh = listouter->ilist)+listouter->inum;
+  ineighn = (ineigh = list->ilist)+list->inum;
 
   for (; ineigh<ineighn; ++ineigh) {                        // loop over my atoms
     i = *ineigh; fi = f0+3*i;
@@ -849,7 +824,7 @@ void PairBuckLongCoulLong::compute_outer(int eflag, int vflag)
     buckai = buck_a[typei]; buckci = buck_c[typei]; rhoinvi = rhoinv[typei];
     cutsqi = cutsq[typei]; cut_bucksqi = cut_bucksq[typei];
     memcpy(xi, x0+(i+(i<<1)), sizeof(vector));
-    jneighn = (jneigh = listouter->firstneigh[i])+listouter->numneigh[i];
+    jneighn = (jneigh = list->firstneigh[i])+list->numneigh[i];
 
     for (; jneigh<jneighn; ++jneigh) {                        // loop over neighbors
       j = *jneigh;
diff --git a/src/KSPACE/pair_buck_long_coul_long.h b/src/KSPACE/pair_buck_long_coul_long.h
index cf752a09b0604c4c676b6d309b7ed3c0a248e089..40fe7c417fa614f3db4133d00e6bacd380145bfe 100644
--- a/src/KSPACE/pair_buck_long_coul_long.h
+++ b/src/KSPACE/pair_buck_long_coul_long.h
@@ -35,7 +35,6 @@ class PairBuckLongCoulLong : public Pair {
   virtual void settings(int, char **);
   void coeff(int, char **);
   void init_style();
-  void init_list(int, class NeighList *);
   double init_one(int, int);
   void write_restart(FILE *);
   void read_restart(FILE *);
diff --git a/src/KSPACE/pair_lj_charmm_coul_long.cpp b/src/KSPACE/pair_lj_charmm_coul_long.cpp
index 7b3fdd4b5f6f3c7987c16a5687ec1c274aba5c74..daa493cef06e6327966c9dfb490a33c071ff8a22 100644
--- a/src/KSPACE/pair_lj_charmm_coul_long.cpp
+++ b/src/KSPACE/pair_lj_charmm_coul_long.cpp
@@ -242,10 +242,10 @@ void PairLJCharmmCoulLong::compute_inner()
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
-  inum = listinner->inum;
-  ilist = listinner->ilist;
-  numneigh = listinner->numneigh;
-  firstneigh = listinner->firstneigh;
+  inum = list->inum_inner;
+  ilist = list->ilist_inner;
+  numneigh = list->numneigh_inner;
+  firstneigh = list->firstneigh_inner;
 
   // loop over neighbors of my atoms
 
@@ -320,10 +320,10 @@ void PairLJCharmmCoulLong::compute_middle()
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
-  inum = listmiddle->inum;
-  ilist = listmiddle->ilist;
-  numneigh = listmiddle->numneigh;
-  firstneigh = listmiddle->firstneigh;
+  inum = list->inum_middle;
+  ilist = list->ilist_middle;
+  numneigh = list->numneigh_middle;
+  firstneigh = list->firstneigh_middle;
 
   // loop over neighbors of my atoms
 
@@ -417,10 +417,10 @@ void PairLJCharmmCoulLong::compute_outer(int eflag, int vflag)
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
-  inum = listouter->inum;
-  ilist = listouter->ilist;
-  numneigh = listouter->numneigh;
-  firstneigh = listouter->firstneigh;
+  inum = list->inum;
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
@@ -687,36 +687,23 @@ void PairLJCharmmCoulLong::init_style()
     error->all(FLERR,
                "Pair style lj/charmm/coul/long requires atom attribute q");
 
-  // request regular or rRESPA neighbor lists
+  // request regular or rRESPA neighbor list
 
   int irequest;
+  int respa = 0;
 
   if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) {
-    int respa = 0;
     if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
     if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
+  }
 
-    if (respa == 0) irequest = neighbor->request(this,instance_me);
-    else if (respa == 1) {
-      irequest = neighbor->request(this,instance_me);
-      neighbor->requests[irequest]->id = 1;
-      neighbor->requests[irequest]->respainner = 1;
-      irequest = neighbor->request(this,instance_me);
-      neighbor->requests[irequest]->id = 3;
-      neighbor->requests[irequest]->respaouter = 1;
-    } else {
-      irequest = neighbor->request(this,instance_me);
-      neighbor->requests[irequest]->id = 1;
-      neighbor->requests[irequest]->respainner = 1;
-      irequest = neighbor->request(this,instance_me);
-      neighbor->requests[irequest]->id = 2;
-      neighbor->requests[irequest]->respamiddle = 1;
-      irequest = neighbor->request(this,instance_me);
-      neighbor->requests[irequest]->id = 3;
-      neighbor->requests[irequest]->respaouter = 1;
-    }
+  irequest = neighbor->request(this,instance_me);
 
-  } else irequest = neighbor->request(this,instance_me);
+  if (respa >= 1) {
+    neighbor->requests[irequest]->respaouter = 1;
+    neighbor->requests[irequest]->respainner = 1;
+  }
+  if (respa == 2) neighbor->requests[irequest]->respamiddle = 1;
 
   // require cut_lj_inner < cut_lj
 
@@ -767,19 +754,6 @@ void PairLJCharmmCoulLong::init_style()
   if (ncoultablebits) init_tables(cut_coul,cut_respa);
 }
 
-/* ----------------------------------------------------------------------
-   neighbor callback to inform pair style of neighbor list to use
-   regular or rRESPA
-------------------------------------------------------------------------- */
-
-void PairLJCharmmCoulLong::init_list(int id, NeighList *ptr)
-{
-  if (id == 0) list = ptr;
-  else if (id == 1) listinner = ptr;
-  else if (id == 2) listmiddle = ptr;
-  else if (id == 3) listouter = ptr;
-}
-
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
diff --git a/src/KSPACE/pair_lj_charmm_coul_long.h b/src/KSPACE/pair_lj_charmm_coul_long.h
index 1544f3bc1420e100be265960debd22e157e5d84e..95c6d0d1c72aa179cf4d58001cb39f79fcf08322 100644
--- a/src/KSPACE/pair_lj_charmm_coul_long.h
+++ b/src/KSPACE/pair_lj_charmm_coul_long.h
@@ -33,7 +33,6 @@ class PairLJCharmmCoulLong : public Pair {
   virtual void settings(int, char **);
   void coeff(int, char **);
   virtual void init_style();
-  void init_list(int, class NeighList *);
   virtual double init_one(int, int);
   void write_restart(FILE *);
   void read_restart(FILE *);
diff --git a/src/KSPACE/pair_lj_charmm_coul_msm.cpp b/src/KSPACE/pair_lj_charmm_coul_msm.cpp
index 76c9ef0cc7ba953b995cc5141e21e9f931ba79b0..00617c0bf2331a51ba721cc9a8ba05ba5209f2ec 100644
--- a/src/KSPACE/pair_lj_charmm_coul_msm.cpp
+++ b/src/KSPACE/pair_lj_charmm_coul_msm.cpp
@@ -278,10 +278,10 @@ void PairLJCharmmCoulMSM::compute_outer(int eflag, int vflag)
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
-  inum = listouter->inum;
-  ilist = listouter->ilist;
-  numneigh = listouter->numneigh;
-  firstneigh = listouter->firstneigh;
+  inum = list->inum;
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
 
   double cut_in_off = cut_respa[2];
   double cut_in_on = cut_respa[3];
diff --git a/src/KSPACE/pair_lj_charmmfsw_coul_long.cpp b/src/KSPACE/pair_lj_charmmfsw_coul_long.cpp
index 30d8ab64b60f086c89d328402aa0b2ba8c45c282..859f4217631c7fe17b5fb29b636da2724e9e0cf8 100644
--- a/src/KSPACE/pair_lj_charmmfsw_coul_long.cpp
+++ b/src/KSPACE/pair_lj_charmmfsw_coul_long.cpp
@@ -274,10 +274,10 @@ void PairLJCharmmfswCoulLong::compute_inner()
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
-  inum = listinner->inum;
-  ilist = listinner->ilist;
-  numneigh = listinner->numneigh;
-  firstneigh = listinner->firstneigh;
+  inum = list->inum_inner;
+  ilist = list->ilist_inner;
+  numneigh = list->numneigh_inner;
+  firstneigh = list->firstneigh_inner;
 
   double cut_out_on = cut_respa[0];
   double cut_out_off = cut_respa[1];
@@ -359,10 +359,10 @@ void PairLJCharmmfswCoulLong::compute_middle()
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
-  inum = listmiddle->inum;
-  ilist = listmiddle->ilist;
-  numneigh = listmiddle->numneigh;
-  firstneigh = listmiddle->firstneigh;
+  inum = list->inum_middle;
+  ilist = list->ilist_middle;
+  numneigh = list->numneigh_middle;
+  firstneigh = list->firstneigh_middle;
 
   double cut_in_off = cut_respa[0];
   double cut_in_on = cut_respa[1];
@@ -465,10 +465,10 @@ void PairLJCharmmfswCoulLong::compute_outer(int eflag, int vflag)
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
-  inum = listouter->inum;
-  ilist = listouter->ilist;
-  numneigh = listouter->numneigh;
-  firstneigh = listouter->firstneigh;
+  inum = list->inum;
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
 
   double cut_in_off = cut_respa[2];
   double cut_in_on = cut_respa[3];
@@ -824,19 +824,6 @@ void PairLJCharmmfswCoulLong::init_style()
   if (ncoultablebits) init_tables(cut_coul,cut_respa);
 }
 
-/* ----------------------------------------------------------------------
-   neighbor callback to inform pair style of neighbor list to use
-   regular or rRESPA
-------------------------------------------------------------------------- */
-
-void PairLJCharmmfswCoulLong::init_list(int id, NeighList *ptr)
-{
-  if (id == 0) list = ptr;
-  else if (id == 1) listinner = ptr;
-  else if (id == 2) listmiddle = ptr;
-  else if (id == 3) listouter = ptr;
-}
-
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
diff --git a/src/KSPACE/pair_lj_charmmfsw_coul_long.h b/src/KSPACE/pair_lj_charmmfsw_coul_long.h
index 650a908e4851dfe7ddb2102fa6b69961cdc727dc..135b82ea72160224c01baf9213ed97659a02be39 100644
--- a/src/KSPACE/pair_lj_charmmfsw_coul_long.h
+++ b/src/KSPACE/pair_lj_charmmfsw_coul_long.h
@@ -33,7 +33,6 @@ class PairLJCharmmfswCoulLong : public Pair {
   virtual void settings(int, char **);
   void coeff(int, char **);
   virtual void init_style();
-  void init_list(int, class NeighList *);
   virtual double init_one(int, int);
   void write_restart(FILE *);
   void read_restart(FILE *);
diff --git a/src/KSPACE/pair_lj_cut_coul_long.cpp b/src/KSPACE/pair_lj_cut_coul_long.cpp
index f8be9fdb79957699414f0d2fe9fbf44368f48807..3096df2b01626e1fbfd255c176ed0803bf07817c 100644
--- a/src/KSPACE/pair_lj_cut_coul_long.cpp
+++ b/src/KSPACE/pair_lj_cut_coul_long.cpp
@@ -224,10 +224,10 @@ void PairLJCutCoulLong::compute_inner()
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
-  inum = listinner->inum;
-  ilist = listinner->ilist;
-  numneigh = listinner->numneigh;
-  firstneigh = listinner->firstneigh;
+  inum = list->inum_inner;
+  ilist = list->ilist_inner;
+  numneigh = list->numneigh_inner;
+  firstneigh = list->firstneigh_inner;
 
   double cut_out_on = cut_respa[0];
   double cut_out_off = cut_respa[1];
@@ -309,10 +309,10 @@ void PairLJCutCoulLong::compute_middle()
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
-  inum = listmiddle->inum;
-  ilist = listmiddle->ilist;
-  numneigh = listmiddle->numneigh;
-  firstneigh = listmiddle->firstneigh;
+  inum = list->inum_middle;
+  ilist = list->ilist_middle;
+  numneigh = list->numneigh_middle;
+  firstneigh = list->firstneigh_middle;
 
   double cut_in_off = cut_respa[0];
   double cut_in_on = cut_respa[1];
@@ -410,10 +410,10 @@ void PairLJCutCoulLong::compute_outer(int eflag, int vflag)
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
-  inum = listouter->inum;
-  ilist = listouter->ilist;
-  numneigh = listouter->numneigh;
-  firstneigh = listouter->firstneigh;
+  inum = list->inum;
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
 
   double cut_in_off = cut_respa[2];
   double cut_in_on = cut_respa[3];
@@ -656,36 +656,23 @@ void PairLJCutCoulLong::init_style()
   if (!atom->q_flag)
     error->all(FLERR,"Pair style lj/cut/coul/long requires atom attribute q");
 
-  // request regular or rRESPA neighbor lists
+  // request regular or rRESPA neighbor list
 
   int irequest;
+  int respa = 0;
 
   if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) {
-    int respa = 0;
     if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
     if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
+  }
 
-    if (respa == 0) irequest = neighbor->request(this,instance_me);
-    else if (respa == 1) {
-      irequest = neighbor->request(this,instance_me);
-      neighbor->requests[irequest]->id = 1;
-      neighbor->requests[irequest]->respainner = 1;
-      irequest = neighbor->request(this,instance_me);
-      neighbor->requests[irequest]->id = 3;
-      neighbor->requests[irequest]->respaouter = 1;
-    } else {
-      irequest = neighbor->request(this,instance_me);
-      neighbor->requests[irequest]->id = 1;
-      neighbor->requests[irequest]->respainner = 1;
-      irequest = neighbor->request(this,instance_me);
-      neighbor->requests[irequest]->id = 2;
-      neighbor->requests[irequest]->respamiddle = 1;
-      irequest = neighbor->request(this,instance_me);
-      neighbor->requests[irequest]->id = 3;
-      neighbor->requests[irequest]->respaouter = 1;
-    }
+  irequest = neighbor->request(this,instance_me);
 
-  } else irequest = neighbor->request(this,instance_me);
+  if (respa >= 1) {
+    neighbor->requests[irequest]->respaouter = 1;
+    neighbor->requests[irequest]->respainner = 1;
+  }
+  if (respa == 2) neighbor->requests[irequest]->respamiddle = 1;
 
   cut_coulsq = cut_coul * cut_coul;
 
@@ -707,19 +694,6 @@ void PairLJCutCoulLong::init_style()
   if (ncoultablebits) init_tables(cut_coul,cut_respa);
 }
 
-/* ----------------------------------------------------------------------
-   neighbor callback to inform pair style of neighbor list to use
-   regular or rRESPA
-------------------------------------------------------------------------- */
-
-void PairLJCutCoulLong::init_list(int id, NeighList *ptr)
-{
-  if (id == 0) list = ptr;
-  else if (id == 1) listinner = ptr;
-  else if (id == 2) listmiddle = ptr;
-  else if (id == 3) listouter = ptr;
-}
-
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
diff --git a/src/KSPACE/pair_lj_cut_coul_long.h b/src/KSPACE/pair_lj_cut_coul_long.h
index 886542d07550d15d6fdf9b28b8b135feae4f04ce..e6f97c088dab552386ebaa7ec74d7917fb865c60 100644
--- a/src/KSPACE/pair_lj_cut_coul_long.h
+++ b/src/KSPACE/pair_lj_cut_coul_long.h
@@ -33,7 +33,6 @@ class PairLJCutCoulLong : public Pair {
   virtual void settings(int, char **);
   void coeff(int, char **);
   virtual void init_style();
-  void init_list(int, class NeighList *);
   virtual double init_one(int, int);
   void write_restart(FILE *);
   void read_restart(FILE *);
diff --git a/src/KSPACE/pair_lj_cut_coul_msm.cpp b/src/KSPACE/pair_lj_cut_coul_msm.cpp
index e3b3f58fcbf83355b8a16d9c4c711022ca881684..9f901db9fcfbb2375734877b0a918368cd49d5cb 100644
--- a/src/KSPACE/pair_lj_cut_coul_msm.cpp
+++ b/src/KSPACE/pair_lj_cut_coul_msm.cpp
@@ -265,10 +265,10 @@ void PairLJCutCoulMSM::compute_outer(int eflag, int vflag)
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
-  inum = listouter->inum;
-  ilist = listouter->ilist;
-  numneigh = listouter->numneigh;
-  firstneigh = listouter->firstneigh;
+  inum = list->inum;
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
 
   double cut_in_off = cut_respa[2];
   double cut_in_on = cut_respa[3];
diff --git a/src/KSPACE/pair_lj_long_coul_long.cpp b/src/KSPACE/pair_lj_long_coul_long.cpp
index 7c6adfcb41609748e41e026769f5a38de178cab4..61b69011f1ac5ae04ca191bd0423f5a4296d7e27 100644
--- a/src/KSPACE/pair_lj_long_coul_long.cpp
+++ b/src/KSPACE/pair_lj_long_coul_long.cpp
@@ -253,51 +253,25 @@ void PairLJLongCoulLong::init_style()
 
   if (force->kspace->neighrequest_flag) {
     int irequest;
-
+    int respa = 0;
+    
     if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) {
-      int respa = 0;
       if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
       if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
-
-      if (respa == 0) irequest = neighbor->request(this,instance_me);
-      else if (respa == 1) {
-        irequest = neighbor->request(this,instance_me);
-        neighbor->requests[irequest]->id = 1;
-        neighbor->requests[irequest]->respainner = 1;
-        irequest = neighbor->request(this,instance_me);
-        neighbor->requests[irequest]->id = 3;
-        neighbor->requests[irequest]->respaouter = 1;
-      } else {
-        irequest = neighbor->request(this,instance_me);
-        neighbor->requests[irequest]->id = 1;
-        neighbor->requests[irequest]->respainner = 1;
-        irequest = neighbor->request(this,instance_me);
-        neighbor->requests[irequest]->id = 2;
-        neighbor->requests[irequest]->respamiddle = 1;
-        irequest = neighbor->request(this,instance_me);
-        neighbor->requests[irequest]->id = 3;
-        neighbor->requests[irequest]->respaouter = 1;
-      }
-
-    } else irequest = neighbor->request(this,instance_me);
+    }
+    
+    irequest = neighbor->request(this,instance_me);
+    
+    if (respa >= 1) {
+      neighbor->requests[irequest]->respaouter = 1;
+      neighbor->requests[irequest]->respainner = 1;
+    }
+    if (respa == 2) neighbor->requests[irequest]->respamiddle = 1;
   }
 
   cut_coulsq = cut_coul * cut_coul;
 }
 
-/* ----------------------------------------------------------------------
-   neighbor callback to inform pair style of neighbor list to use
-   regular or rRESPA
-------------------------------------------------------------------------- */
-
-void PairLJLongCoulLong::init_list(int id, NeighList *ptr)
-{
-  if (id == 0) list = ptr;
-  else if (id == 1) listinner = ptr;
-  else if (id == 2) listmiddle = ptr;
-  else if (id == 3) listouter = ptr;
-}
-
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
@@ -649,13 +623,13 @@ void PairLJLongCoulLong::compute_inner()
   double qri, *cut_ljsqi, *lj1i, *lj2i;
   vector xi, d;
 
-  ineighn = (ineigh = listinner->ilist)+listinner->inum;
+  ineighn = (ineigh = list->ilist_inner)+list->inum_inner;
   for (; ineigh<ineighn; ++ineigh) {                        // loop over my atoms
     i = *ineigh; fi = f0+3*i;
     memcpy(xi, x0+(i+(i<<1)), sizeof(vector));
     cut_ljsqi = cut_ljsq[typei = type[i]];
     lj1i = lj1[typei]; lj2i = lj2[typei];
-    jneighn = (jneigh = listinner->firstneigh[i])+listinner->numneigh[i];
+    jneighn = (jneigh = list->firstneigh_inner[i])+list->numneigh_inner[i];
     for (; jneigh<jneighn; ++jneigh) {                        // loop over neighbors
       j = *jneigh;
       ni = sbmask(j);
@@ -736,7 +710,7 @@ void PairLJLongCoulLong::compute_middle()
   double qri, *cut_ljsqi, *lj1i, *lj2i;
   vector xi, d;
 
-  ineighn = (ineigh = listmiddle->ilist)+listmiddle->inum;
+  ineighn = (ineigh = list->ilist_middle)+list->inum_middle;
 
   for (; ineigh<ineighn; ++ineigh) {                        // loop over my atoms
     i = *ineigh; fi = f0+3*i;
@@ -744,7 +718,7 @@ void PairLJLongCoulLong::compute_middle()
     memcpy(xi, x0+(i+(i<<1)), sizeof(vector));
     cut_ljsqi = cut_ljsq[typei = type[i]];
     lj1i = lj1[typei]; lj2i = lj2[typei];
-    jneighn = (jneigh = listmiddle->firstneigh[i])+listmiddle->numneigh[i];
+    jneighn = (jneigh = list->firstneigh_middle[i])+list->numneigh_middle[i];
 
     for (; jneigh<jneighn; ++jneigh) {
       j = *jneigh;
@@ -833,7 +807,7 @@ void PairLJLongCoulLong::compute_outer(int eflag, int vflag)
   double cut_in_off_sq = cut_in_off*cut_in_off;
   double cut_in_on_sq = cut_in_on*cut_in_on;
 
-  ineighn = (ineigh = listouter->ilist)+listouter->inum;
+  ineighn = (ineigh = list->ilist)+list->inum;
 
   for (; ineigh<ineighn; ++ineigh) {                        // loop over my atoms
     i = *ineigh; fi = f0+3*i;
@@ -842,7 +816,7 @@ void PairLJLongCoulLong::compute_outer(int eflag, int vflag)
     lj1i = lj1[typei]; lj2i = lj2[typei]; lj3i = lj3[typei]; lj4i = lj4[typei];
     cutsqi = cutsq[typei]; cut_ljsqi = cut_ljsq[typei];
     memcpy(xi, x0+(i+(i<<1)), sizeof(vector));
-    jneighn = (jneigh = listouter->firstneigh[i])+listouter->numneigh[i];
+    jneighn = (jneigh = list->firstneigh[i])+list->numneigh[i];
 
     for (; jneigh<jneighn; ++jneigh) {                        // loop over neighbors
       j = *jneigh;
diff --git a/src/KSPACE/pair_lj_long_coul_long.h b/src/KSPACE/pair_lj_long_coul_long.h
index 22704c79fae39f32039a9d58b053dbf02c78274f..f11c81e28915fe19adc7c6cf13391493a5d54c03 100644
--- a/src/KSPACE/pair_lj_long_coul_long.h
+++ b/src/KSPACE/pair_lj_long_coul_long.h
@@ -34,7 +34,6 @@ class PairLJLongCoulLong : public Pair {
   virtual void settings(int, char **);
   void coeff(int, char **);
   void init_style();
-  void init_list(int, class NeighList *);
   double init_one(int, int);
   void write_restart(FILE *);
   void read_restart(FILE *);
diff --git a/src/KSPACE/pair_lj_long_tip4p_long.cpp b/src/KSPACE/pair_lj_long_tip4p_long.cpp
index 1dc1ca1cb451f0a09fa30e7925efae444bacd98f..1b0eb12e2c256423568c1984cdfb2742dfcd9fd1 100644
--- a/src/KSPACE/pair_lj_long_tip4p_long.cpp
+++ b/src/KSPACE/pair_lj_long_tip4p_long.cpp
@@ -516,10 +516,10 @@ void PairLJLongTIP4PLong::compute_inner()
   int ni;
   double *lj1i, *lj2i;
 
-  inum = listinner->inum;
-  ilist = listinner->ilist;
-  numneigh = listinner->numneigh;
-  firstneigh = listinner->firstneigh;
+  inum = list->inum_inner;
+  ilist = list->ilist_inner;
+  numneigh = list->numneigh_inner;
+  firstneigh = list->firstneigh_inner;
 
   // loop over neighbors of my atoms
 
@@ -769,10 +769,10 @@ void PairLJLongTIP4PLong::compute_middle()
   int ni;
   double  *lj1i, *lj2i;
 
-  inum = listmiddle->inum;
-  ilist = listmiddle->ilist;
-  numneigh = listmiddle->numneigh;
-  firstneigh = listmiddle->firstneigh;
+  inum = list->inum_middle;
+  ilist = list->ilist_middle;
+  numneigh = list->numneigh_middle;
+  firstneigh = list->firstneigh_middle;
 
   // loop over neighbors of my atoms
 
@@ -1049,10 +1049,10 @@ void PairLJLongTIP4PLong::compute_outer(int eflag, int vflag)
   double cut_in_off_sq = cut_in_off*cut_in_off;
   double cut_in_on_sq = cut_in_on*cut_in_on;
 
-  inum = listouter->inum;
-  ilist = listouter->ilist;
-  numneigh = listouter->numneigh;
-  firstneigh = listouter->firstneigh;
+  inum = list->inum;
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
diff --git a/src/MAKE/MACHINES/Makefile.cori2 b/src/MAKE/MACHINES/Makefile.cori2
index a367d540808add4aeb036aa6a2cabd0745fc98a0..45e1ab1f8a3aed152bd96ce567b858d0148eaee8 100755
--- a/src/MAKE/MACHINES/Makefile.cori2
+++ b/src/MAKE/MACHINES/Makefile.cori2
@@ -15,13 +15,14 @@ SHELL = /bin/sh
 
 CC =		CC
 OPTFLAGS =      -xMIC-AVX512 -O2 -fp-model fast=2 -no-prec-div -qoverride-limits
-CCFLAGS =	-g -qopenmp -DLAMMPS_MEMALIGN=64 -qno-offload \
-                -fno-alias -ansi-alias -restrict $(OPTFLAGS) -DLMP_INTEL_NO_TBB
+CCFLAGS =	-qopenmp -qno-offload -fno-alias -ansi-alias -restrict \
+                -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG -DLMP_INTEL_NO_TBB \
+                $(OPTFLAGS)
 SHFLAGS =	-fPIC
 DEPFLAGS =	-M
 
 LINK =		CC
-LINKFLAGS =	-g -qopenmp $(OPTFLAGS)
+LINKFLAGS =	-qopenmp $(OPTFLAGS)
 LIB =           
 SIZE =		size
 
diff --git a/src/MAKE/MACHINES/Makefile.icex b/src/MAKE/MACHINES/Makefile.icex
deleted file mode 100644
index e9e0c30857f4b2be1767d3a8706fc4e73786c649..0000000000000000000000000000000000000000
--- a/src/MAKE/MACHINES/Makefile.icex
+++ /dev/null
@@ -1,119 +0,0 @@
-# mpi = MPI with its default compiler
-
-SHELL = /bin/sh
-
-# ---------------------------------------------------------------------
-# compiler/linker settings
-# specify flags and libraries needed for your compiler
-
-KOKKOS_PATH=/home/stefan/projects/lammps-mine/lib/kokkos
-CC =		mpicxx
-CCFLAGS =	-g -O3 -Wall -Wextra -frounding-math -fsignaling-nans -march=native
-SHFLAGS =	-shared -MD -mcmodel=medium -fpic -fPIC
-DEPFLAGS =	-M
-
-LINK =		mpicxx
-LINKFLAGS =	-g -O
-LIB = 
-SIZE =		size
-
-ARCHIVE =	ar
-ARFLAGS =	-rc
-SHLIBFLAGS =	-shared
-KOKKOS_DEVICES = Cuda,OpenMP
-KOKKOS_ARCH =   Pascal61
-
-# ---------------------------------------------------------------------
-# LAMMPS-specific settings, all OPTIONAL
-# specify settings for LAMMPS features you will use
-# if you change any -D setting, do full re-compile after "make clean"
-
-# LAMMPS ifdef settings
-# see possible settings in Section 2.2 (step 4) of manual
-
-LMP_INC =	-DLAMMPS_GZIP
-#LMP_INC +=	-DLAMMPS_JPEG
-LMP_INC +=	-DLAMMPS_MEMALIGN=64
-
-# MPI library
-# see discussion in Section 2.2 (step 5) of manual
-# MPI wrapper compiler/linker can provide this info
-# can point to dummy MPI library in src/STUBS as in Makefile.serial
-# use -D MPICH and OMPI settings in INC to avoid C++ lib conflicts
-# INC = path for mpi.h, MPI compiler settings
-# PATH = path for MPI library
-# LIB = name of MPI library
-
-MPI_INC =       -DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1
-MPI_PATH = 
-MPI_LIB =	
-
-# FFT library
-# see discussion in Section 2.2 (step 6) of manual
-# can be left blank to use provided KISS FFT library
-# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
-# PATH = path for FFT library
-# LIB = name of FFT library
-
-FFT_INC =    	
-FFT_PATH = 
-FFT_LIB =	
-
-# JPEG and/or PNG library
-# see discussion in Section 2.2 (step 7) of manual
-# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
-# INC = path(s) for jpeglib.h and/or png.h
-# PATH = path(s) for JPEG library and/or PNG library
-# LIB = name(s) of JPEG library and/or PNG library
-
-JPG_INC =       
-JPG_PATH = 	
-JPG_LIB =	
-
-# ---------------------------------------------------------------------
-# build rules and dependencies
-# do not edit this section
-
-include	Makefile.package.settings
-include	Makefile.package
-
-EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
-EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
-EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
-EXTRA_CPP_DEPENDS = $(PKG_CPP_DEPENDS)
-EXTRA_LINK_DEPENDS = $(PKG_LINK_DEPENDS)
-
-# Path to src files
-
-vpath %.cpp ..
-vpath %.h ..
-
-# Link target
-
-$(EXE):	$(OBJ) $(EXTRA_LINK_DEPENDS)
-	$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
-	$(SIZE) $(EXE)
-
-# Library targets
-
-lib:	$(OBJ) $(EXTRA_LINK_DEPENDS)
-	$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
-
-shlib:	$(OBJ) $(EXTRA_LINK_DEPENDS)
-	$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
-        $(OBJ) $(EXTRA_LIB) $(LIB)
-
-# Compilation rules
-
-%.o:%.cpp
-	$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
-
-# Individual dependencies
-
-depend : fastdep.exe $(SRC)
-	@./fastdep.exe $(EXTRA_INC) -- $^ > .depend || exit 1
-
-fastdep.exe: ../DEPEND/fastdep.c
-	cc -O -o $@ $<
-
-sinclude .depend
diff --git a/src/MAKE/OPTIONS/Makefile.intel_coprocessor b/src/MAKE/OPTIONS/Makefile.intel_coprocessor
index a717be93ff54387708aa8f1080ddab40d5ec9a1f..75e4d89170c5f2d2431dfe1151bd1b23b3a3bd14 100644
--- a/src/MAKE/OPTIONS/Makefile.intel_coprocessor
+++ b/src/MAKE/OPTIONS/Makefile.intel_coprocessor
@@ -10,7 +10,7 @@ CC =		mpiicpc
 MIC_OPT =       -qoffload-option,mic,compiler,"-fp-model fast=2 -mGLOB_default_function_attrs=\"gather_scatter_loop_unroll=4\""
 CCFLAGS =	-g -O3 -qopenmp -DLMP_INTEL_OFFLOAD -DLAMMPS_MEMALIGN=64 \
                 -xHost -fno-alias -ansi-alias -restrict -DLMP_INTEL_USELRT \
-                -qoverride-limits $(MIC_OPT)
+                -qoverride-limits $(MIC_OPT) -DLMP_USE_MKL_RNG
 SHFLAGS =	-fPIC
 DEPFLAGS =	-M
 
diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu b/src/MAKE/OPTIONS/Makefile.intel_cpu
old mode 100755
new mode 100644
index b7db0645740a1fa62db80bc4e3637500af7f9fce..41d0f959feeba082640a3da8aa883421ee786cdd
--- a/src/MAKE/OPTIONS/Makefile.intel_cpu
+++ b/src/MAKE/OPTIONS/Makefile.intel_cpu
@@ -8,14 +8,14 @@ SHELL = /bin/sh
 
 CC =		mpiicpc 
 OPTFLAGS =      -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits
-CCFLAGS =	-g -qopenmp -DLAMMPS_MEMALIGN=64 -no-offload \
-                -fno-alias -ansi-alias -restrict $(OPTFLAGS)
+CCFLAGS =	-qopenmp -qno-offload -fno-alias -ansi-alias -restrict \
+                -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS)
 SHFLAGS =	-fPIC
 DEPFLAGS =	-M
 
 LINK =		mpiicpc
-LINKFLAGS =	-g -qopenmp $(OPTFLAGS)
-LIB =           -ltbbmalloc -ltbbmalloc_proxy
+LINKFLAGS =	-qopenmp $(OPTFLAGS)
+LIB =           -ltbbmalloc
 SIZE =		size
 
 ARCHIVE =	ar
diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi b/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi
index 8a45b781f87d64c9ce36ee0ae9024650c17d9ccd..ef514f43c68f4401e62113cfc4c5ff34a7565175 100644
--- a/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi
+++ b/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi
@@ -8,8 +8,8 @@ SHELL = /bin/sh
 
 CC =		mpiicpc 
 OPTFLAGS =      -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits
-CCFLAGS =	-qopenmp -DLAMMPS_MEMALIGN=64 -qno-offload \
-                -fno-alias -ansi-alias -restrict $(OPTFLAGS) -DLMP_INTEL_USELRT
+CCFLAGS =	-qopenmp -qno-offload -fno-alias -ansi-alias -restrict \
+                -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS)
 SHFLAGS =	-fPIC
 DEPFLAGS =	-M
 
diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich b/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich
index 40d517bce41bfb5a720f8badfe06b54993bfbe01..e4dc74d79b353d6984b7af74892a9455cbb79b37 100644
--- a/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich
+++ b/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich
@@ -8,14 +8,14 @@ SHELL = /bin/sh
 
 CC =		mpicxx -cxx=icc
 OPTFLAGS =      -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits
-CCFLAGS =	-g -qopenmp -DLAMMPS_MEMALIGN=64 -no-offload \
-                -fno-alias -ansi-alias -restrict $(OPTFLAGS) -DLMP_INTEL_USELRT
+CCFLAGS =	-qopenmp -qno-offload -fno-alias -ansi-alias -restrict \
+                -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS)
 SHFLAGS =	-fPIC
 DEPFLAGS =	-M
 
 LINK =		mpicxx -cxx=icc
-LINKFLAGS =	-g -qopenmp $(OPTFLAGS)
-LIB =           
+LINKFLAGS =	-qopenmp $(OPTFLAGS)
+LIB =           -ltbbmalloc
 SIZE =		size
 
 ARCHIVE =	ar
diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi b/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi
index fe1be99e5881390a7b919a505ba75dc352685835..457a64b223a1fbd0c4859720ca939d3e99e96d5c 100644
--- a/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi
+++ b/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi
@@ -9,14 +9,14 @@ SHELL = /bin/sh
 export OMPI_CXX = icc
 CC =		mpicxx
 OPTFLAGS =      -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits
-CCFLAGS =	-g -qopenmp -DLAMMPS_MEMALIGN=64 -no-offload \
-                -fno-alias -ansi-alias -restrict $(OPTFLAGS) -DLMP_INTEL_USELRT
+CCFLAGS =	-qopenmp -qno-offload -fno-alias -ansi-alias -restrict \
+                -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS)
 SHFLAGS =	-fPIC
 DEPFLAGS =	-M
 
 LINK =		mpicxx
-LINKFLAGS =	-g -qopenmp $(OPTFLAGS)
-LIB =           -ltbbmalloc -ltbbmalloc_proxy
+LINKFLAGS =	-qopenmp $(OPTFLAGS)
+LIB =           -ltbbmalloc
 SIZE =		size
 
 ARCHIVE =	ar
diff --git a/src/MAKE/OPTIONS/Makefile.intel_knl_coprocessor b/src/MAKE/OPTIONS/Makefile.intel_knl_coprocessor
deleted file mode 100644
index 406e98b36d542c23e6d1d79ab5bc6d83d45f009e..0000000000000000000000000000000000000000
--- a/src/MAKE/OPTIONS/Makefile.intel_knl_coprocessor
+++ /dev/null
@@ -1,123 +0,0 @@
-# intel_phi = USER-INTEL with Phi x200 (KNL) offload support,Intel MPI,MKL FFT
-
-SHELL = /bin/sh
-
-# ---------------------------------------------------------------------
-# compiler/linker settings
-# specify flags and libraries needed for your compiler
-
-CC =		mpiicpc 
-MIC_OPT =       -qoffload-arch=mic-avx512 -fp-model fast=2
-CCFLAGS =	-O3 -qopenmp -DLMP_INTEL_OFFLOAD -DLAMMPS_MEMALIGN=64 \
-                -xHost -fno-alias -ansi-alias -restrict \
-                -qoverride-limits $(MIC_OPT) -DLMP_INTEL_USELRT
-SHFLAGS =	-fPIC
-DEPFLAGS =	-M
-
-LINK =		mpiicpc
-LINKFLAGS =	-g -O3 -xHost -qopenmp -qoffload $(MIC_OPT)
-LIB =           -ltbbmalloc
-SIZE =		size
-
-ARCHIVE =	ar
-ARFLAGS =	-rc
-SHLIBFLAGS =	-shared
-
-# ---------------------------------------------------------------------
-# LAMMPS-specific settings, all OPTIONAL
-# specify settings for LAMMPS features you will use
-# if you change any -D setting, do full re-compile after "make clean"
-
-# LAMMPS ifdef settings
-# see possible settings in Section 2.2 (step 4) of manual
-
-LMP_INC =	-DLAMMPS_GZIP -DLAMMPS_JPEG
-
-# MPI library
-# see discussion in Section 2.2 (step 5) of manual
-# MPI wrapper compiler/linker can provide this info
-# can point to dummy MPI library in src/STUBS as in Makefile.serial
-# use -D MPICH and OMPI settings in INC to avoid C++ lib conflicts
-# INC = path for mpi.h, MPI compiler settings
-# PATH = path for MPI library
-# LIB = name of MPI library
-
-MPI_INC =       -DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1
-MPI_PATH = 
-MPI_LIB =
-
-# FFT library
-# see discussion in Section 2.2 (step 6) of manaul
-# can be left blank to use provided KISS FFT library
-# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
-# PATH = path for FFT library
-# LIB = name of FFT library
-
-FFT_INC =      -DFFT_MKL -DFFT_SINGLE
-FFT_PATH = 
-FFT_LIB =	-L$(MKLROOT)/lib/intel64/ -lmkl_intel_ilp64 -lmkl_sequential -lmkl_core
-
-# JPEG and/or PNG library
-# see discussion in Section 2.2 (step 7) of manual
-# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
-# INC = path(s) for jpeglib.h and/or png.h
-# PATH = path(s) for JPEG library and/or PNG library
-# LIB = name(s) of JPEG library and/or PNG library
-
-JPG_INC =       
-JPG_PATH = 	
-JPG_LIB =	-ljpeg
-
-# ---------------------------------------------------------------------
-# build rules and dependencies
-# do not edit this section
-
-include	Makefile.package.settings
-include	Makefile.package
-
-EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
-EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
-EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
-EXTRA_CPP_DEPENDS = $(PKG_CPP_DEPENDS)
-EXTRA_LINK_DEPENDS = $(PKG_LINK_DEPENDS)
-
-# Path to src files
-
-vpath %.cpp ..
-vpath %.h ..
-
-# Link target
-
-$(EXE):	$(OBJ) $(EXTRA_LINK_DEPENDS)
-	$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
-	$(SIZE) $(EXE)
-
-# Library targets
-
-lib:	$(OBJ) $(EXTRA_LINK_DEPENDS)
-	$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
-
-shlib:	$(OBJ) $(EXTRA_LINK_DEPENDS)
-	$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
-        $(OBJ) $(EXTRA_LIB) $(LIB)
-
-# Compilation rules
-
-%.o:%.cpp $(EXTRA_CPP_DEPENDS)
-	$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
-
-%.d:%.cpp $(EXTRA_CPP_DEPENDS)
-	$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
-
-%.o:%.cu $(EXTRA_CPP_DEPENDS)
-	$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
-
-# Individual dependencies
-
-depend : fastdep.exe $(SRC)
-	@./fastdep.exe $(EXTRA_INC) -- $^ > .depend || exit 1
-
-fastdep.exe: ../DEPEND/fastdep.c
-	cc -O -o $@ $<
-
-sinclude .depend
diff --git a/src/MAKE/OPTIONS/Makefile.knl b/src/MAKE/OPTIONS/Makefile.knl
index 881c51f0e4de91654743c024f0af146459206e4a..8e266a4fce859e58e661080e6bd446489a7dec1d 100644
--- a/src/MAKE/OPTIONS/Makefile.knl
+++ b/src/MAKE/OPTIONS/Makefile.knl
@@ -8,13 +8,13 @@ SHELL = /bin/sh
 
 CC =		mpiicpc
 OPTFLAGS =      -xMIC-AVX512 -O2 -fp-model fast=2 -no-prec-div -qoverride-limits
-CCFLAGS =	-qopenmp -DLAMMPS_MEMALIGN=64 -qno-offload \
-                -fno-alias -ansi-alias -restrict $(OPTFLAGS)
+CCFLAGS =	-qopenmp -qno-offload -fno-alias -ansi-alias -restrict \
+                -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS)
 SHFLAGS =	-fPIC
 DEPFLAGS =	-M
 
 LINK =		mpiicpc
-LINKFLAGS =	-g -qopenmp $(OPTFLAGS)
+LINKFLAGS =	-qopenmp $(OPTFLAGS)
 LIB =           -ltbbmalloc
 SIZE =		size
 
diff --git a/src/MOLECULE/angle_table.cpp b/src/MOLECULE/angle_table.cpp
index 4d9007adb7263014f9ed1c795e7bf40afa274683..6e145efa10850909b288d8e0941ecc27c641a979 100644
--- a/src/MOLECULE/angle_table.cpp
+++ b/src/MOLECULE/angle_table.cpp
@@ -609,18 +609,22 @@ double AngleTable::splint(double *xa, double *ya, double *y2a, int n, double x)
 
 void AngleTable::uf_lookup(int type, double x, double &u, double &f)
 {
-  int itable;
+  if (!ISFINITE(x)) {
+    error->one(FLERR,"Illegal angle in angle style table");
+  }
+
   double fraction,a,b;
+  const Table *tb = &tables[tabindex[type]];
+  int itable = static_cast<int> (x * tb->invdelta);
 
-  Table *tb = &tables[tabindex[type]];
+  if (itable < 0) itable = 0;
+  if (itable >= tablength) itable = tablength-1;
 
   if (tabstyle == LINEAR) {
-    itable = static_cast<int> ( x * tb->invdelta);
     fraction = (x - tb->ang[itable]) * tb->invdelta;
     u = tb->e[itable] + fraction*tb->de[itable];
     f = tb->f[itable] + fraction*tb->df[itable];
   } else if (tabstyle == SPLINE) {
-    itable = static_cast<int> ( x * tb->invdelta);
     fraction = (x - tb->ang[itable]) * tb->invdelta;
 
     b = (x - tb->ang[itable]) * tb->invdelta;
@@ -640,17 +644,21 @@ void AngleTable::uf_lookup(int type, double x, double &u, double &f)
 
 void AngleTable::u_lookup(int type, double x, double &u)
 {
-  int itable;
+  if (!ISFINITE(x)) {
+    error->one(FLERR,"Illegal angle in angle style table");
+  }
+
   double fraction,a,b;
+  const Table *tb = &tables[tabindex[type]];
+  int itable = static_cast<int> ( x * tb->invdelta);
 
-  Table *tb = &tables[tabindex[type]];
+  if (itable < 0) itable = 0;
+  if (itable >= tablength) itable = tablength-1;
 
   if (tabstyle == LINEAR) {
-    itable = static_cast<int> ( x * tb->invdelta);
     fraction = (x - tb->ang[itable]) * tb->invdelta;
     u = tb->e[itable] + fraction*tb->de[itable];
   } else if (tabstyle == SPLINE) {
-    itable = static_cast<int> ( x * tb->invdelta);
     fraction = (x - tb->ang[itable]) * tb->invdelta;
 
     b = (x - tb->ang[itable]) * tb->invdelta;
diff --git a/src/MOLECULE/bond_gromos.cpp b/src/MOLECULE/bond_gromos.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4e409b7a72d686a7c573801a7bdaba608548ffd7
--- /dev/null
+++ b/src/MOLECULE/bond_gromos.cpp
@@ -0,0 +1,210 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include "bond_gromos.h"
+#include "atom.h"
+#include "neighbor.h"
+#include "domain.h"
+#include "comm.h"
+#include "force.h"
+#include "memory.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+BondGromos::BondGromos(LAMMPS *lmp) : Bond(lmp)
+{
+  reinitflag = 1;
+}
+
+/* ---------------------------------------------------------------------- */
+
+BondGromos::~BondGromos()
+{
+  if (allocated && !copymode) {
+    memory->destroy(setflag);
+    memory->destroy(k);
+    memory->destroy(r0);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void BondGromos::compute(int eflag, int vflag)
+{
+  int i1,i2,n,type;
+  double delx,dely,delz,ebond,fbond;
+
+  ebond = 0.0;
+  if (eflag || vflag) ev_setup(eflag,vflag);
+  else evflag = 0;
+
+  double **x = atom->x;
+  double **f = atom->f;
+  int **bondlist = neighbor->bondlist;
+  int nbondlist = neighbor->nbondlist;
+  int nlocal = atom->nlocal;
+  int newton_bond = force->newton_bond;
+
+  for (n = 0; n < nbondlist; n++) {
+    i1 = bondlist[n][0];
+    i2 = bondlist[n][1];
+    type = bondlist[n][2];
+
+    delx = x[i1][0] - x[i2][0];
+    dely = x[i1][1] - x[i2][1];
+    delz = x[i1][2] - x[i2][2];
+
+    const double rsq = delx*delx + dely*dely + delz*delz;
+    const double dr = rsq - r0[type]*r0[type];
+    const double kdr = k[type]*dr;
+
+    // force & energy
+
+    fbond = -4.0 * kdr;
+    if (eflag) ebond = kdr*dr;
+
+    // apply force to each of 2 atoms
+
+    if (newton_bond || i1 < nlocal) {
+      f[i1][0] += delx*fbond;
+      f[i1][1] += dely*fbond;
+      f[i1][2] += delz*fbond;
+    }
+
+    if (newton_bond || i2 < nlocal) {
+      f[i2][0] -= delx*fbond;
+      f[i2][1] -= dely*fbond;
+      f[i2][2] -= delz*fbond;
+    }
+
+    if (evflag) ev_tally(i1,i2,nlocal,newton_bond,ebond,fbond,delx,dely,delz);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void BondGromos::allocate()
+{
+  allocated = 1;
+  int n = atom->nbondtypes;
+
+  memory->create(k,n+1,"bond:k");
+  memory->create(r0,n+1,"bond:r0");
+
+  memory->create(setflag,n+1,"bond:setflag");
+  for (int i = 1; i <= n; i++) setflag[i] = 0;
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more types
+------------------------------------------------------------------------- */
+
+void BondGromos::coeff(int narg, char **arg)
+{
+  if (narg != 3) error->all(FLERR,"Incorrect args for bond coefficients");
+  if (!allocated) allocate();
+
+  int ilo,ihi;
+  force->bounds(FLERR,arg[0],atom->nbondtypes,ilo,ihi);
+
+  double k_one = force->numeric(FLERR,arg[1]);
+  double r0_one = force->numeric(FLERR,arg[2]);
+
+  int count = 0;
+  for (int i = ilo; i <= ihi; i++) {
+    k[i] = k_one;
+    r0[i] = r0_one;
+    setflag[i] = 1;
+    count++;
+  }
+
+  if (count == 0) error->all(FLERR,"Incorrect args for bond coefficients");
+}
+
+/* ----------------------------------------------------------------------
+   return an equilbrium bond length
+------------------------------------------------------------------------- */
+
+double BondGromos::equilibrium_distance(int i)
+{
+  return r0[i];
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes out coeffs to restart file
+------------------------------------------------------------------------- */
+
+void BondGromos::write_restart(FILE *fp)
+{
+  fwrite(&k[1],sizeof(double),atom->nbondtypes,fp);
+  fwrite(&r0[1],sizeof(double),atom->nbondtypes,fp);
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads coeffs from restart file, bcasts them
+------------------------------------------------------------------------- */
+
+void BondGromos::read_restart(FILE *fp)
+{
+  allocate();
+
+  if (comm->me == 0) {
+    fread(&k[1],sizeof(double),atom->nbondtypes,fp);
+    fread(&r0[1],sizeof(double),atom->nbondtypes,fp);
+  }
+  MPI_Bcast(&k[1],atom->nbondtypes,MPI_DOUBLE,0,world);
+  MPI_Bcast(&r0[1],atom->nbondtypes,MPI_DOUBLE,0,world);
+
+  for (int i = 1; i <= atom->nbondtypes; i++) setflag[i] = 1;
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to data file
+------------------------------------------------------------------------- */
+
+void BondGromos::write_data(FILE *fp)
+{
+  for (int i = 1; i <= atom->nbondtypes; i++)
+    fprintf(fp,"%d %g %g\n",i,k[i],r0[i]);
+}
+
+/* ---------------------------------------------------------------------- */
+
+double BondGromos::single(int type, double rsq, int i, int j,
+                        double &fforce)
+{
+  double dr = rsq - r0[type]*r0[type];
+  fforce = -4.0*k[type] * dr;
+  return k[type]*dr*dr;
+}
+
+/* ----------------------------------------------------------------------
+    Return ptr to internal members upon request.
+------------------------------------------------------------------------ */
+void *BondGromos::extract( char *str, int &dim )
+{
+  dim = 1;
+  if( strcmp(str,"kappa")==0) return (void*) k;
+  if( strcmp(str,"r0")==0) return (void*) r0;
+  return NULL;
+}
diff --git a/src/MOLECULE/bond_gromos.h b/src/MOLECULE/bond_gromos.h
new file mode 100644
index 0000000000000000000000000000000000000000..dafe85e92b2e671427d3c1e8e8e68e6ab38b0202
--- /dev/null
+++ b/src/MOLECULE/bond_gromos.h
@@ -0,0 +1,58 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef BOND_CLASS
+
+BondStyle(gromos,BondGromos)
+
+#else
+
+#ifndef LMP_BOND_GROMOS_H
+#define LMP_BOND_GROMOS_H
+
+#include <stdio.h>
+#include "bond.h"
+
+namespace LAMMPS_NS {
+
+class BondGromos : public Bond {
+ public:
+  BondGromos(class LAMMPS *);
+  virtual ~BondGromos();
+  virtual void compute(int, int);
+  void coeff(int, char **);
+  double equilibrium_distance(int);
+  void write_restart(FILE *);
+  void read_restart(FILE *);
+  void write_data(FILE *);
+  double single(int, double, int, int, double &);
+  virtual void *extract(char *, int &);
+
+ protected:
+  double *k,*r0;
+
+  virtual void allocate();
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+E: Incorrect args for bond coefficients
+
+Self-explanatory.  Check the input script or data file.
+
+*/
diff --git a/src/MOLECULE/bond_table.cpp b/src/MOLECULE/bond_table.cpp
index 38cbe7e406823af76b118c6eb0529515e8d7971d..4f8db66757c5b71fa392e3c84ee5d5d6f76ec946 100644
--- a/src/MOLECULE/bond_table.cpp
+++ b/src/MOLECULE/bond_table.cpp
@@ -590,29 +590,29 @@ double BondTable::splint(double *xa, double *ya, double *y2a, int n, double x)
 
 void BondTable::uf_lookup(int type, double x, double &u, double &f)
 {
-  int itable;
+  if (!ISFINITE(x)) {
+    error->one(FLERR,"Illegal bond in bond style table");
+  }
+
   double fraction,a,b;
   char estr[128];
-
-  Table *tb = &tables[tabindex[type]];
-  if (x < tb->lo) {
+  const Table *tb = &tables[tabindex[type]];
+  const int itable = static_cast<int> ((x - tb->lo) * tb->invdelta);
+  if (itable < 0) {
     sprintf(estr,"Bond length < table inner cutoff: "
             "type %d length %g",type,x);
     error->one(FLERR,estr);
-  }
-  if (x > tb->hi) {
+  } else if (itable >= tablength) {
     sprintf(estr,"Bond length > table outer cutoff: "
             "type %d length %g",type,x);
     error->one(FLERR,estr);
   }
 
   if (tabstyle == LINEAR) {
-    itable = static_cast<int> ((x - tb->lo) * tb->invdelta);
     fraction = (x - tb->r[itable]) * tb->invdelta;
     u = tb->e[itable] + fraction*tb->de[itable];
     f = tb->f[itable] + fraction*tb->df[itable];
   } else if (tabstyle == SPLINE) {
-    itable = static_cast<int> ((x - tb->lo) * tb->invdelta);
     fraction = (x - tb->r[itable]) * tb->invdelta;
 
     b = (x - tb->r[itable]) * tb->invdelta;
@@ -633,19 +633,28 @@ void BondTable::uf_lookup(int type, double x, double &u, double &f)
 
 void BondTable::u_lookup(int type, double x, double &u)
 {
-  int itable;
-  double fraction,a,b;
+  if (!ISFINITE(x)) {
+    error->one(FLERR,"Illegal bond in bond style table");
+  }
 
-  Table *tb = &tables[tabindex[type]];
-  x = MAX(x,tb->lo);
-  x = MIN(x,tb->hi);
+  double fraction,a,b;
+  char estr[128];
+  const Table *tb = &tables[tabindex[type]];
+  const int itable = static_cast<int> ((x - tb->lo) * tb->invdelta);
+  if (itable < 0) {
+    sprintf(estr,"Bond length < table inner cutoff: "
+            "type %d length %g",type,x);
+    error->one(FLERR,estr);
+  } else if (itable >= tablength) {
+    sprintf(estr,"Bond length > table outer cutoff: "
+            "type %d length %g",type,x);
+    error->one(FLERR,estr);
+  }
 
   if (tabstyle == LINEAR) {
-    itable = static_cast<int> ((x - tb->lo) * tb->invdelta);
     fraction = (x - tb->r[itable]) * tb->invdelta;
     u = tb->e[itable] + fraction*tb->de[itable];
   } else if (tabstyle == SPLINE) {
-    itable = static_cast<int> ((x - tb->lo) * tb->invdelta);
     fraction = (x - tb->r[itable]) * tb->invdelta;
 
     b = (x - tb->r[itable]) * tb->invdelta;
diff --git a/src/Makefile b/src/Makefile
index 243ac869e90a4de35c4cb701ad05fc792069649a..e0f0db77fe3d5eea0ed95b04541047344919ee45 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -63,7 +63,7 @@ PACKUSER = user-atc user-awpmd user-cgdna user-cgsdk user-colvars \
 	   user-mgpt user-misc user-molfile \
 	   user-netcdf user-omp user-phonon user-qmmm user-qtb \
 	   user-quip user-reaxc user-smd user-smtbq user-sph user-tally \
-	   user-vtk
+	   user-uef user-vtk
 
 PACKLIB = compress gpu kim kokkos latte meam mpiio mscg poems \
 	  python reax voronoi \
diff --git a/src/OPT/pair_lj_long_coul_long_opt.cpp b/src/OPT/pair_lj_long_coul_long_opt.cpp
index 9004e5c93cbe57007584de2105e6e29af2e65d0a..678d2d8bc404581f7ede6e3e78ddd341cf434485 100644
--- a/src/OPT/pair_lj_long_coul_long_opt.cpp
+++ b/src/OPT/pair_lj_long_coul_long_opt.cpp
@@ -726,7 +726,7 @@ void PairLJLongCoulLongOpt::eval_outer()
   double cut_in_off_sq = cut_in_off*cut_in_off;
   double cut_in_on_sq = cut_in_on*cut_in_on;
 
-  ineighn = (ineigh = listouter->ilist)+listouter->inum;
+  ineighn = (ineigh = list->ilist)+list->inum;
 
   for (; ineigh<ineighn; ++ineigh) {                        // loop over my atoms
     i = *ineigh; fi = f0+3*i;
@@ -735,7 +735,7 @@ void PairLJLongCoulLongOpt::eval_outer()
     lj1i = lj1[typei]; lj2i = lj2[typei]; lj3i = lj3[typei]; lj4i = lj4[typei];
     cutsqi = cutsq[typei]; cut_ljsqi = cut_ljsq[typei];
     memcpy(xi, x0+(i+(i<<1)), sizeof(vector));
-    jneighn = (jneigh = listouter->firstneigh[i])+listouter->numneigh[i];
+    jneighn = (jneigh = list->firstneigh[i])+list->numneigh[i];
 
     for (; jneigh<jneighn; ++jneigh) {                        // loop over neighbors
       j = *jneigh;
diff --git a/src/Purge.list b/src/Purge.list
index 7ccdf3afd52ac354c65fc6638557aca7491eed00..312994fdb7cf413464728e45dfec655a7b6feb95 100644
--- a/src/Purge.list
+++ b/src/Purge.list
@@ -16,6 +16,9 @@ style_region.h
 style_neigh_bin.h
 style_neigh_pair.h
 style_neigh_stencil.h
+# deleted on 11 October 2017
+fix_shear_history_omp.cpp
+fix_shear_history_omp.h
 # deleted on 5 September 2017
 npair_halffull_newton_ssa.cpp
 npair_halffull_newton_ssa.h
diff --git a/src/REPLICA/prd.cpp b/src/REPLICA/prd.cpp
index 30ebc779c5e9a8a5c90b335e975c2c4cf05edd65..14eeac8d664551927278454f577de998a7bdc4a0 100644
--- a/src/REPLICA/prd.cpp
+++ b/src/REPLICA/prd.cpp
@@ -310,6 +310,7 @@ void PRD::command(int narg, char **arg)
   time_dephase = time_dynamics = time_quench = time_comm = time_output = 0.0;
   bigint clock = 0;
 
+  timer->init();
   timer->barrier_start();
   time_start = timer->get_wall(Timer::TOTAL);
 
diff --git a/src/REPLICA/tad.cpp b/src/REPLICA/tad.cpp
index 5a4d88522488efcb62e4cff69ce482b1f610cf88..347cd3ba67c0927740086a879d2de5f750cf3faf 100644
--- a/src/REPLICA/tad.cpp
+++ b/src/REPLICA/tad.cpp
@@ -274,6 +274,7 @@ void TAD::command(int narg, char **arg)
   nbuild = ndanger = 0;
   time_neb = time_dynamics = time_quench = time_comm = time_output = 0.0;
 
+  timer->init();
   timer->barrier_start();
   time_start = timer->get_wall(Timer::TOTAL);
 
diff --git a/src/RIGID/fix_rigid.cpp b/src/RIGID/fix_rigid.cpp
index d2a770cc4712db6b48b2f18823a86a23e1b61f82..33a4b441fda4f9f3eefb3039db46c4ea2724a454 100644
--- a/src/RIGID/fix_rigid.cpp
+++ b/src/RIGID/fix_rigid.cpp
@@ -29,6 +29,8 @@
 #include "comm.h"
 #include "random_mars.h"
 #include "force.h"
+#include "input.h"
+#include "variable.h"
 #include "output.h"
 #include "math_const.h"
 #include "memory.h"
@@ -127,15 +129,60 @@ FixRigid::FixRigid(LAMMPS *lmp, int narg, char **arg) :
   // nbody = # of non-zero ncount values
   // use nall as incremented ptr to set body[] values for each atom
 
-  } else if (strcmp(arg[3],"molecule") == 0) {
+  } else if (strcmp(arg[3],"molecule") == 0 || strcmp(arg[3],"custom") == 0) {
     rstyle = MOLECULE;
-    iarg = 4;
-    if (atom->molecule_flag == 0)
-      error->all(FLERR,"Fix rigid molecule requires atom attribute molecule");
-
+    tagint *molecule;
     int *mask = atom->mask;
-    tagint *molecule = atom->molecule;
     int nlocal = atom->nlocal;
+    int custom_flag = strcmp(arg[3],"custom") == 0;
+    if (custom_flag) {
+      if (narg < 5) error->all(FLERR,"Illegal fix rigid command");
+
+      // determine whether atom-style variable or atom property is used.
+      if (strstr(arg[4],"i_") == arg[4]) {
+        int is_double=0;
+        int custom_index = atom->find_custom(arg[4]+2,is_double);
+        if (custom_index == -1)
+          error->all(FLERR,"Fix rigid custom requires previously defined property/atom");
+        else if (is_double)
+          error->all(FLERR,"Fix rigid custom requires integer-valued property/atom");
+        int minval = INT_MAX;
+        int *value = atom->ivector[custom_index];
+        for (i = 0; i < nlocal; i++)
+          if (mask[i] & groupbit) minval = MIN(minval,value[i]);
+        int vmin = minval;
+        MPI_Allreduce(&vmin,&minval,1,MPI_INT,MPI_MIN,world);
+        molecule = new tagint[nlocal];
+        for (i = 0; i < nlocal; i++)
+          if (mask[i] & groupbit)
+            molecule[i] = (tagint)(value[i] - minval + 1);
+          else
+            molecule[i] = 0;
+
+      } else if (strstr(arg[4],"v_") == arg[4]) {
+        int ivariable = input->variable->find(arg[4]+2);
+        if (ivariable < 0)
+          error->all(FLERR,"Variable name for fix rigid custom does not exist");
+        if (input->variable->atomstyle(ivariable) == 0)
+          error->all(FLERR,"Fix rigid custom variable is no atom-style variable");
+        double *value = new double[nlocal];
+        input->variable->compute_atom(ivariable,0,value,1,0);
+        int minval = INT_MAX;
+        for (i = 0; i < nlocal; i++)
+          if (mask[i] & groupbit) minval = MIN(minval,(int)value[i]);
+        int vmin = minval;
+        MPI_Allreduce(&vmin,&minval,1,MPI_INT,MPI_MIN,world);
+        molecule = new tagint[nlocal];
+        for (i = 0; i < nlocal; i++)
+          if (mask[i] & groupbit) molecule[i] = (tagint)((tagint)value[i] - minval + 1);
+        delete[] value;
+      } else error->all(FLERR,"Unsupported fix rigid custom property");
+    } else {
+      if (atom->molecule_flag == 0)
+        error->all(FLERR,"Fix rigid molecule requires atom attribute molecule");
+      molecule = atom->molecule;
+    }
+    iarg = 4 + custom_flag;
 
     tagint maxmol_tag = -1;
     for (i = 0; i < nlocal; i++)
@@ -174,6 +221,7 @@ FixRigid::FixRigid(LAMMPS *lmp, int narg, char **arg) :
     }
 
     memory->destroy(ncount);
+    if (custom_flag) delete [] molecule;
 
   // each listed group is a rigid body
   // check if all listed groups exist
diff --git a/src/RIGID/fix_rigid_small.cpp b/src/RIGID/fix_rigid_small.cpp
index 1404c3bf58da22f20739dbf521c8aa2adcfea723..e6083f64937e5077ffac90c88ebe5014b393629a 100644
--- a/src/RIGID/fix_rigid_small.cpp
+++ b/src/RIGID/fix_rigid_small.cpp
@@ -29,7 +29,9 @@
 #include "group.h"
 #include "comm.h"
 #include "force.h"
+#include "input.h"
 #include "output.h"
+#include "variable.h"
 #include "random_mars.h"
 #include "math_const.h"
 #include "memory.h"
@@ -64,11 +66,12 @@ enum{FULL_BODY,INITIAL,FINAL,FORCE_TORQUE,VCM_ANGMOM,XCM_MASS,ITENSOR,DOF};
 
 FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) :
   Fix(lmp, narg, arg), step_respa(NULL), 
-  infile(NULL), body(NULL), bodyown(NULL), bodytag(NULL), atom2body(NULL), 
-  xcmimage(NULL), displace(NULL), eflags(NULL), orient(NULL), dorient(NULL), 
-  avec_ellipsoid(NULL), avec_line(NULL), avec_tri(NULL), counts(NULL), 
-  itensor(NULL), mass_body(NULL), langextra(NULL), random(NULL), id_dilate(NULL), 
-  onemols(NULL), hash(NULL), bbox(NULL), ctr(NULL), idclose(NULL), rsqclose(NULL)
+  infile(NULL), body(NULL), bodyown(NULL), bodytag(NULL), atom2body(NULL),
+  xcmimage(NULL), displace(NULL), eflags(NULL), orient(NULL), dorient(NULL),
+  avec_ellipsoid(NULL), avec_line(NULL), avec_tri(NULL), counts(NULL),
+  itensor(NULL), mass_body(NULL), langextra(NULL), random(NULL),
+  id_dilate(NULL), onemols(NULL), hash(NULL), bbox(NULL), ctr(NULL),
+  idclose(NULL), rsqclose(NULL)
 {
   int i;
 
@@ -89,7 +92,7 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) :
   // perform initial allocation of atom-based arrays
   // register with Atom class
 
-  extended = orientflag = dorientflag = 0;
+  extended = orientflag = dorientflag = customflag = 0;
   bodyown = NULL;
   bodytag = NULL;
   atom2body = NULL;
@@ -103,24 +106,71 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) :
 
   // parse args for rigid body specification
 
+  int *mask = atom->mask;
+  tagint *bodyid = NULL;
+  int nlocal = atom->nlocal;
+
   if (narg < 4) error->all(FLERR,"Illegal fix rigid/small command");
-  if (strcmp(arg[3],"molecule") != 0)
-    error->all(FLERR,"Illegal fix rigid/small command");
+  if (strcmp(arg[3],"molecule") == 0) {
+    if (atom->molecule_flag == 0)
+      error->all(FLERR,"Fix rigid/small requires atom attribute molecule");
+    bodyid = atom->molecule;
+
+  } else if (strcmp(arg[3],"custom") == 0) {
+    if (narg < 5) error->all(FLERR,"Illegal fix rigid/small command");
+      bodyid = new tagint[nlocal];
+      customflag = 1;
+
+      // determine whether atom-style variable or atom property is used.
+      if (strstr(arg[4],"i_") == arg[4]) {
+        int is_double=0;
+        int custom_index = atom->find_custom(arg[4]+2,is_double);
+        if (custom_index == -1)
+          error->all(FLERR,"Fix rigid/small custom requires previously defined property/atom");
+        else if (is_double)
+          error->all(FLERR,"Fix rigid/small custom requires integer-valued property/atom");
+
+        int minval = INT_MAX;
+        int *value = atom->ivector[custom_index];
+        for (i = 0; i < nlocal; i++)
+          if (mask[i] & groupbit) minval = MIN(minval,value[i]);
+        int vmin = minval;
+        MPI_Allreduce(&vmin,&minval,1,MPI_INT,MPI_MIN,world);
+
+        for (i = 0; i < nlocal; i++)
+          if (mask[i] & groupbit) 
+            bodyid[i] = (tagint)(value[i] - minval + 1);
+          else bodyid[i] = 0;
+
+      } else if (strstr(arg[4],"v_") == arg[4]) {
+        int ivariable = input->variable->find(arg[4]+2);
+        if (ivariable < 0)
+          error->all(FLERR,"Variable name for fix rigid/small custom does not exist");
+        if (input->variable->atomstyle(ivariable) == 0)
+          error->all(FLERR,"Fix rigid/small custom variable is no atom-style variable");
+        double *value = new double[nlocal];
+        input->variable->compute_atom(ivariable,0,value,1,0);
+        int minval = INT_MAX;
+        for (i = 0; i < nlocal; i++)
+          if (mask[i] & groupbit) minval = MIN(minval,(int)value[i]);
+        int vmin = minval;
+        MPI_Allreduce(&vmin,&minval,1,MPI_INT,MPI_MIN,world);
+
+        for (i = 0; i < nlocal; i++)
+          if (mask[i] & groupbit)
+            bodyid[i] = (tagint)((tagint)value[i] - minval + 1);
+          else bodyid[0] = 0;
+        delete[] value;
+      } else error->all(FLERR,"Unsupported fix rigid custom property");
+  } else error->all(FLERR,"Illegal fix rigid/small command");
 
-  if (atom->molecule_flag == 0)
-    error->all(FLERR,"Fix rigid/small requires atom attribute molecule");
   if (atom->map_style == 0)
     error->all(FLERR,"Fix rigid/small requires an atom map, see atom_modify");
 
-  // maxmol = largest molecule #
-
-  int *mask = atom->mask;
-  tagint *molecule = atom->molecule;
-  int nlocal = atom->nlocal;
-
+  // maxmol = largest bodyid #
   maxmol = -1;
   for (i = 0; i < nlocal; i++)
-    if (mask[i] & groupbit) maxmol = MAX(maxmol,molecule[i]);
+    if (mask[i] & groupbit) maxmol = MAX(maxmol,bodyid[i]);
 
   tagint itmp;
   MPI_Allreduce(&maxmol,&itmp,1,MPI_LMP_TAGINT,MPI_MAX,world);
@@ -155,6 +205,8 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) :
   }
 
   int iarg = 4;
+  if (customflag) ++iarg;
+
   while (iarg < narg) {
     if (strcmp(arg[iarg],"langevin") == 0) {
       if (iarg+5 > narg) error->all(FLERR,"Illegal fix rigid/small command");
@@ -344,11 +396,12 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) :
   if (pcouple == XYZ || (domain->dimension == 2 && pcouple == XY)) pstyle = ISO;
   else pstyle = ANISO;
 
-  // create rigid bodies based on molecule ID
+  // create rigid bodies based on molecule or custom ID
   // sets bodytag for owned atoms
   // body attributes are computed later by setup_bodies()
 
-  create_bodies();
+  create_bodies(bodyid);
+  if (customflag) delete [] bodyid;
 
   // set nlocal_body and allocate bodies I own
 
@@ -1424,7 +1477,7 @@ void FixRigidSmall::set_v()
    set bodytag for all owned atoms
 ------------------------------------------------------------------------- */
 
-void FixRigidSmall::create_bodies()
+void FixRigidSmall::create_bodies(tagint *bodyid)
 {
   int i,m,n;
   double unwrap[3];
@@ -1464,8 +1517,8 @@ void FixRigidSmall::create_bodies()
   double *buf;
   memory->create(buf,ncount*percount,"rigid/small:buf");
 
-  // create map hash for storing unique molecule IDs of my atoms
-  // key = molecule ID
+  // create map hash for storing unique body IDs of my atoms
+  // key = body ID
   // value = index into per-body data structure
   // n = # of entries in hash
 
@@ -1477,12 +1530,10 @@ void FixRigidSmall::create_bodies()
   // value = index into N-length data structure
   // n = count of unique bodies my atoms are part of
 
-  tagint *molecule = atom->molecule;
-
   n = 0;
   for (i = 0; i < nlocal; i++) {
     if (!(mask[i] & groupbit)) continue;
-    if (hash->find(molecule[i]) == hash->end()) (*hash)[molecule[i]] = n++;
+    if (hash->find(bodyid[i]) == hash->end()) (*hash)[bodyid[i]] = n++;
   }
 
   // bbox = bounding box of each rigid body my atoms are part of
@@ -1494,7 +1545,7 @@ void FixRigidSmall::create_bodies()
     bbox[i][1] = bbox[i][3] = bbox[i][5] = -BIG;
   }
 
-  // pack my atoms into buffer as molecule ID, unwrapped coords
+  // pack my atoms into buffer as body ID, unwrapped coords
 
   double **x = atom->x;
 
@@ -1502,7 +1553,7 @@ void FixRigidSmall::create_bodies()
   for (i = 0; i < nlocal; i++) {
     if (!(mask[i] & groupbit)) continue;
     domain->unmap(x[i],image[i],unwrap);
-    buf[m++] = molecule[i];
+    buf[m++] = bodyid[i];
     buf[m++] = unwrap[0];
     buf[m++] = unwrap[1];
     buf[m++] = unwrap[2];
@@ -1542,7 +1593,7 @@ void FixRigidSmall::create_bodies()
 
   for (i = 0; i < n; i++) rsqclose[i] = BIG;
 
-  // pack my atoms into buffer as molecule ID, atom ID, unwrapped coords
+  // pack my atoms into buffer as body ID, atom ID, unwrapped coords
 
   tagint *tag = atom->tag;
 
@@ -1550,7 +1601,7 @@ void FixRigidSmall::create_bodies()
   for (i = 0; i < nlocal; i++) {
     if (!(mask[i] & groupbit)) continue;
     domain->unmap(x[i],image[i],unwrap);
-    buf[m++] = molecule[i];
+    buf[m++] = bodyid[i];
     buf[m++] = ubuf(tag[i]).d;
     buf[m++] = unwrap[0];
     buf[m++] = unwrap[1];
@@ -1570,7 +1621,7 @@ void FixRigidSmall::create_bodies()
   for (i = 0; i < nlocal; i++) {
     bodytag[i] = 0;
     if (!(mask[i] & groupbit)) continue;
-    m = hash->find(molecule[i])->second;
+    m = hash->find(bodyid[i])->second;
     bodytag[i] = idclose[m];
     rsqmax = MAX(rsqmax,rsqclose[m]);
   }
diff --git a/src/RIGID/fix_rigid_small.h b/src/RIGID/fix_rigid_small.h
index b07dea4f333ea3f27b7919fb74a4b2c52fc9585e..22f9b0c16c31e7ba8910a558dc1802fff6d4b128 100644
--- a/src/RIGID/fix_rigid_small.h
+++ b/src/RIGID/fix_rigid_small.h
@@ -79,6 +79,7 @@ class FixRigidSmall : public Fix {
   char *infile;             // file to read rigid body attributes from
   int setupflag;            // 1 if body properties are setup, else 0
   int commflag;             // various modes of forward/reverse comm
+  int customflag;           // 1 if custom property/variable define bodies
   int nbody;                // total # of rigid bodies
   int nlinear;              // total # of linear rigid bodies
   tagint maxmol;            // max mol-ID
@@ -187,7 +188,7 @@ class FixRigidSmall : public Fix {
   void image_shift();
   void set_xv();
   void set_v();
-  void create_bodies();
+  void create_bodies(tagint *);
   void setup_bodies_static();
   void setup_bodies_dynamic();
   void readfile(int, double **, int *);
diff --git a/src/USER-COLVARS/colvarproxy_lammps.cpp b/src/USER-COLVARS/colvarproxy_lammps.cpp
index 17dff305673456b1312ef2be36393f6c8e54281e..c5b9e5a60c860355a4a5960089b8f288b9566841 100644
--- a/src/USER-COLVARS/colvarproxy_lammps.cpp
+++ b/src/USER-COLVARS/colvarproxy_lammps.cpp
@@ -120,12 +120,6 @@ colvarproxy_lammps::colvarproxy_lammps(LAMMPS_NS::LAMMPS *lmp,
   if (restart_output_prefix_str.rfind(".*") != std::string::npos)
     restart_output_prefix_str.erase(restart_output_prefix_str.rfind(".*"),2);
 
-#if defined(_OPENMP)
-  if (smp_thread_id() == 0) {
-    omp_init_lock(&smp_lock_state);
-  }
-#endif
-
   // initialize multi-replica support, if available
   if (replica_enabled()) {
     MPI_Comm_rank(inter_comm, &inter_me);
@@ -143,7 +137,7 @@ void colvarproxy_lammps::init(const char *conf_file)
   colvars = new colvarmodule(this);
 
   cvm::log("Using LAMMPS interface, version "+
-            cvm::to_str(COLVARPROXY_VERSION)+".\n");
+           cvm::to_str(COLVARPROXY_VERSION)+".\n");
 
   my_angstrom  = _lmp->force->angstrom;
   my_boltzmann = _lmp->force->boltz;
@@ -155,7 +149,8 @@ void colvarproxy_lammps::init(const char *conf_file)
   colvars->setup_output();
 
   if (_lmp->update->ntimestep != 0) {
-    cvm::log("Initializing step number as firstTimestep.\n");
+    cvm::log("Setting initial step number from LAMMPS: "+
+             cvm::to_str(_lmp->update->ntimestep)+"\n");
     colvars->it = colvars->it_restart = _lmp->update->ntimestep;
   }
 
@@ -172,7 +167,6 @@ colvarproxy_lammps::~colvarproxy_lammps()
 {
   delete _random;
   if (colvars != NULL) {
-    colvars->write_output_files();
     delete colvars;
     colvars = NULL;
   }
@@ -188,10 +182,18 @@ int colvarproxy_lammps::setup()
 // trigger colvars computation
 double colvarproxy_lammps::compute()
 {
+  if (cvm::debug()) {
+    log(std::string(cvm::line_marker)+
+        "colvarproxy_lammps step no. "+
+        cvm::to_str(_lmp->update->ntimestep)+" [first - last = "+
+        cvm::to_str(_lmp->update->beginstep)+" - "+
+        cvm::to_str(_lmp->update->endstep)+"]\n");
+  }
+
   if (first_timestep) {
     first_timestep = false;
   } else {
-    // Use the time step number inherited from LAMMPS
+    // Use the time step number from LAMMPS Update object
     if ( _lmp->update->ntimestep - previous_step == 1 )
       colvars->it++;
     // Other cases could mean:
@@ -241,6 +243,13 @@ void colvarproxy_lammps::serialize_status(std::string &rst)
   rst = os.str();
 }
 
+void colvarproxy_lammps::write_output_files()
+{
+  // TODO skip output if undefined
+  colvars->write_restart_file(cvm::output_prefix()+".colvars.state");
+  colvars->write_output_files();
+}
+
 // set status from string
 bool colvarproxy_lammps::deserialize_status(std::string &rst)
 {
@@ -331,89 +340,6 @@ int colvarproxy_lammps::backup_file(char const *filename)
 }
 
 
-#if defined(_OPENMP)
-
-
-// SMP support
-
-int colvarproxy_lammps::smp_enabled()
-{
-  if (b_smp_active) {
-    return COLVARS_OK;
-  }
-  return COLVARS_ERROR;
-}
-
-
-int colvarproxy_lammps::smp_colvars_loop()
-{
-  colvarmodule *cv = this->colvars;
-  colvarproxy_lammps *proxy = (colvarproxy_lammps *) cv->proxy;
-#pragma omp parallel for
-  for (size_t i = 0; i < cv->variables_active_smp()->size(); i++) {
-    colvar *x = (*(cv->variables_active_smp()))[i];
-    int x_item = (*(cv->variables_active_smp_items()))[i];
-    if (cvm::debug()) {
-      cvm::log("["+cvm::to_str(proxy->smp_thread_id())+"/"+cvm::to_str(proxy->smp_num_threads())+
-               "]: calc_colvars_items_smp(), i = "+cvm::to_str(i)+", cv = "+
-               x->name+", cvc = "+cvm::to_str(x_item)+"\n");
-    }
-    x->calc_cvcs(x_item, 1);
-  }
-  return cvm::get_error();
-}
-
-
-int colvarproxy_lammps::smp_biases_loop()
-{
-  colvarmodule *cv = this->colvars;
-#pragma omp parallel for
-  for (size_t i = 0; i < cv->biases_active()->size(); i++) {
-    colvarbias *b = (*(cv->biases_active()))[i];
-    if (cvm::debug()) {
-      cvm::log("Calculating bias \""+b->name+"\" on thread "+
-               cvm::to_str(smp_thread_id())+"\n");
-    }
-    b->update();
-  }
-  return cvm::get_error();
-}
-
-
-int colvarproxy_lammps::smp_thread_id()
-{
-  return omp_get_thread_num();
-}
-
-
-int colvarproxy_lammps::smp_num_threads()
-{
-  return omp_get_max_threads();
-}
-
-
-int colvarproxy_lammps::smp_lock()
-{
-  omp_set_lock(&smp_lock_state);
-  return COLVARS_OK;
-}
-
-
-int colvarproxy_lammps::smp_trylock()
-{
-  return omp_test_lock(&smp_lock_state) ? COLVARS_OK : COLVARS_ERROR;
-}
-
-
-int colvarproxy_lammps::smp_unlock()
-{
-  omp_unset_lock(&smp_lock_state);
-  return COLVARS_OK;
-}
-
-#endif
-
-
 // multi-replica support
 
 void colvarproxy_lammps::replica_comm_barrier() {
diff --git a/src/USER-COLVARS/colvarproxy_lammps.h b/src/USER-COLVARS/colvarproxy_lammps.h
index 6cdf0edfe8b97272aab39841d8229e8f08d37331..af2aa04dfc890a55443588435206279280581b6c 100644
--- a/src/USER-COLVARS/colvarproxy_lammps.h
+++ b/src/USER-COLVARS/colvarproxy_lammps.h
@@ -25,10 +25,6 @@
 #include <vector>
 #include <iostream>
 
-#if defined(_OPENMP)
-#include <omp.h>
-#endif
-
 /* struct for packed data communication of coordinates and forces. */
 struct commdata {
   int tag,type;
@@ -91,7 +87,8 @@ class colvarproxy_lammps : public colvarproxy {
   // methods for lammps to move data or trigger actions in the proxy
  public:
   void set_temperature(double t) { t_target = t; };
-  bool total_forces_enabled() const { return  total_force_requested; };
+  bool total_forces_enabled() const { return total_force_requested; };
+  bool total_forces_same_step() const { return true; };
   bool want_exit() const { return do_exit; };
 
   // perform colvars computation. returns biasing energy
@@ -103,6 +100,10 @@ class colvarproxy_lammps : public colvarproxy {
   // set status from string
   bool deserialize_status(std::string &);
 
+  // Write files expected from Colvars (called by post_run())
+  void write_output_files();
+
+
   // implementation of pure methods from base class
  public:
 
@@ -140,21 +141,6 @@ class colvarproxy_lammps : public colvarproxy {
   // implementation of optional methods from base class
  public:
 
-#if defined(_OPENMP)
-  // SMP support
-  int smp_enabled();
-  int smp_colvars_loop();
-  int smp_biases_loop();
-  int smp_thread_id();
-  int smp_num_threads();
-protected:
-  omp_lock_t smp_lock_state;
-public:
-  int smp_lock();
-  int smp_trylock();
-  int smp_unlock();
-#endif
-
   // Multi-replica support
   // Indicate if multi-replica support is available and active
   virtual bool replica_enabled() { return (inter_comm != MPI_COMM_NULL); }
diff --git a/src/USER-COLVARS/colvarproxy_lammps_version.h b/src/USER-COLVARS/colvarproxy_lammps_version.h
index 0eb6f2d95ac6bbb4c98fe5c75fc2308a7758342c..45ecea867f17c7fdf2b5bbad780ac860287e8836 100644
--- a/src/USER-COLVARS/colvarproxy_lammps_version.h
+++ b/src/USER-COLVARS/colvarproxy_lammps_version.h
@@ -1,5 +1,5 @@
 #ifndef COLVARPROXY_VERSION
-#define COLVARPROXY_VERSION "2017-07-19"
+#define COLVARPROXY_VERSION "2017-10-20"
 // This file is part of the Collective Variables module (Colvars).
 // The original version of Colvars and its updates are located at:
 // https://github.com/colvars/colvars
diff --git a/src/USER-COLVARS/fix_colvars.cpp b/src/USER-COLVARS/fix_colvars.cpp
index 59e6c46b76113bd1594dcf2acb983ee75212b15b..956ba6498a518dacc28f4848788fd55427db839e 100644
--- a/src/USER-COLVARS/fix_colvars.cpp
+++ b/src/USER-COLVARS/fix_colvars.cpp
@@ -379,6 +379,7 @@ int FixColvars::setmask()
   mask |= POST_FORCE;
   mask |= POST_FORCE_RESPA;
   mask |= END_OF_STEP;
+  mask |= POST_RUN;
   return mask;
 }
 
@@ -913,6 +914,7 @@ void FixColvars::write_restart(FILE *fp)
   if (me == 0) {
     std::string rest_text("");
     proxy->serialize_status(rest_text);
+    // TODO call write_output_files()
     const char *cvm_state = rest_text.c_str();
     int len = strlen(cvm_state) + 1; // need to include terminating NULL byte.
     fwrite(&len,sizeof(int),1,fp);
@@ -934,6 +936,15 @@ void FixColvars::restart(char *buf)
 
 /* ---------------------------------------------------------------------- */
 
+void FixColvars::post_run()
+{
+  if (me == 0) {
+    proxy->write_output_files();
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
 double FixColvars::compute_scalar()
 {
   return energy;
diff --git a/src/USER-COLVARS/fix_colvars.h b/src/USER-COLVARS/fix_colvars.h
index c00b18aa4668b56ea8a1a82a729a0d02a9b0433a..509eca5de35892b3094cf5749b7d6d088d7b0d18 100644
--- a/src/USER-COLVARS/fix_colvars.h
+++ b/src/USER-COLVARS/fix_colvars.h
@@ -56,6 +56,7 @@ class FixColvars : public Fix {
   virtual void post_force(int);
   virtual void post_force_respa(int, int, int);
   virtual void end_of_step();
+  virtual void post_run();
   virtual double compute_scalar();
   virtual double memory_usage();
 
diff --git a/src/USER-DPD/nbin_ssa.h b/src/USER-DPD/nbin_ssa.h
index 2a0175081ea02edab1647cdfaf210acc647300e0..12a572b94caab73125eb01887b89849189c11826 100644
--- a/src/USER-DPD/nbin_ssa.h
+++ b/src/USER-DPD/nbin_ssa.h
@@ -23,6 +23,8 @@ NBinStyle(ssa,
 #define LMP_NBIN_SSA_H
 
 #include "nbin_standard.h"
+#include "math.h"
+#include "error.h"
 
 namespace LAMMPS_NS {
 
@@ -47,79 +49,14 @@ class NBinSSA : public NBinStandard {
 
   bigint memory_usage();
 
-  inline
-  int coord2bin(const double & x,const double & y,const double & z) const
-  {
-    int ix,iy,iz;
-
-    if (x >= bboxhi_[0])
-      ix = static_cast<int> ((x-bboxhi_[0])*bininvx) + nbinx;
-    else if (x >= bboxlo_[0]) {
-      ix = static_cast<int> ((x-bboxlo_[0])*bininvx);
-      ix = MIN(ix,nbinx-1);
-    } else
-      ix = static_cast<int> ((x-bboxlo_[0])*bininvx) - 1;
-
-    if (y >= bboxhi_[1])
-      iy = static_cast<int> ((y-bboxhi_[1])*bininvy) + nbiny;
-    else if (y >= bboxlo_[1]) {
-      iy = static_cast<int> ((y-bboxlo_[1])*bininvy);
-      iy = MIN(iy,nbiny-1);
-    } else
-      iy = static_cast<int> ((y-bboxlo_[1])*bininvy) - 1;
-
-    if (z >= bboxhi_[2])
-      iz = static_cast<int> ((z-bboxhi_[2])*bininvz) + nbinz;
-    else if (z >= bboxlo_[2]) {
-      iz = static_cast<int> ((z-bboxlo_[2])*bininvz);
-      iz = MIN(iz,nbinz-1);
-    } else
-      iz = static_cast<int> ((z-bboxlo_[2])*bininvz) - 1;
-
-    return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
-  }
-
-  inline
-  int coord2bin(const double & x,const double & y,const double & z, int* i) const
-  {
-    int ix,iy,iz;
-
-    if (x >= bboxhi_[0])
-      ix = static_cast<int> ((x-bboxhi_[0])*bininvx) + nbinx;
-    else if (x >= bboxlo_[0]) {
-      ix = static_cast<int> ((x-bboxlo_[0])*bininvx);
-      ix = MIN(ix,nbinx-1);
-    } else
-      ix = static_cast<int> ((x-bboxlo_[0])*bininvx) - 1;
-
-    if (y >= bboxhi_[1])
-      iy = static_cast<int> ((y-bboxhi_[1])*bininvy) + nbiny;
-    else if (y >= bboxlo_[1]) {
-      iy = static_cast<int> ((y-bboxlo_[1])*bininvy);
-      iy = MIN(iy,nbiny-1);
-    } else
-      iy = static_cast<int> ((y-bboxlo_[1])*bininvy) - 1;
-
-    if (z >= bboxhi_[2])
-      iz = static_cast<int> ((z-bboxhi_[2])*bininvz) + nbinz;
-    else if (z >= bboxlo_[2]) {
-      iz = static_cast<int> ((z-bboxlo_[2])*bininvz);
-      iz = MIN(iz,nbinz-1);
-    } else
-      iz = static_cast<int> ((z-bboxlo_[2])*bininvz) - 1;
-
-    i[0] = ix - mbinxlo;
-    i[1] = iy - mbinylo;
-    i[2] = iz - mbinzlo;
-
-    return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
-  }
-
   inline
   int coord2bin(const double & x,const double & y,const double & z, int &ixo, int &iyo, int &izo) const
   {
     int ix,iy,iz;
 
+    if (!ISFINITE(x) || !ISFINITE(y) || !ISFINITE(z))
+      error->one(FLERR,"Non-numeric positions - simulation unstable");
+
     if (x >= bboxhi_[0])
       ix = static_cast<int> ((x-bboxhi_[0])*bininvx) + nbinx;
     else if (x >= bboxlo_[0]) {
diff --git a/src/USER-DRUDE/pair_lj_cut_thole_long.cpp b/src/USER-DRUDE/pair_lj_cut_thole_long.cpp
index ee9c0744d3d1d6bc32cee5561f63522228f89176..4163a816ac9bcd8662fd25facb1200019071cd76 100644
--- a/src/USER-DRUDE/pair_lj_cut_thole_long.cpp
+++ b/src/USER-DRUDE/pair_lj_cut_thole_long.cpp
@@ -378,19 +378,6 @@ void PairLJCutTholeLong::init_style()
   if (ncoultablebits) init_tables(cut_coul,cut_respa);
 }
 
-/* ----------------------------------------------------------------------
-   neighbor callback to inform pair style of neighbor list to use
-   regular or rRESPA
-------------------------------------------------------------------------- */
-
-void PairLJCutTholeLong::init_list(int id, NeighList *ptr)
-{
-  if (id == 0) list = ptr;
-  else if (id == 1) listinner = ptr;
-  else if (id == 2) listmiddle = ptr;
-  else if (id == 3) listouter = ptr;
-}
-
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
diff --git a/src/USER-DRUDE/pair_lj_cut_thole_long.h b/src/USER-DRUDE/pair_lj_cut_thole_long.h
index 894042f6ce6bdad9219d44a174cd3f2fbeac3741..27a917c7375a5c58c49baf037c9bfb65bea5eb84 100644
--- a/src/USER-DRUDE/pair_lj_cut_thole_long.h
+++ b/src/USER-DRUDE/pair_lj_cut_thole_long.h
@@ -34,7 +34,6 @@ class PairLJCutTholeLong : public Pair {
   virtual void settings(int, char **);
   void coeff(int, char **);
   virtual void init_style();
-  void init_list(int, class NeighList *);
   virtual double init_one(int, int);
   void write_restart(FILE *);
   void read_restart(FILE *);
diff --git a/src/USER-FEP/pair_lj_charmm_coul_long_soft.cpp b/src/USER-FEP/pair_lj_charmm_coul_long_soft.cpp
index d8bfd698bebd48762793ce4d24285d17dd42263e..06285a58d3c3009e8ddb94fa7eb8402dc1ff8c3c 100644
--- a/src/USER-FEP/pair_lj_charmm_coul_long_soft.cpp
+++ b/src/USER-FEP/pair_lj_charmm_coul_long_soft.cpp
@@ -226,10 +226,10 @@ void PairLJCharmmCoulLongSoft::compute_inner()
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
-  inum = listinner->inum;
-  ilist = listinner->ilist;
-  numneigh = listinner->numneigh;
-  firstneigh = listinner->firstneigh;
+  inum = list->inum_inner;
+  ilist = list->ilist_inner;
+  numneigh = list->numneigh_inner;
+  firstneigh = list->firstneigh_inner;
 
   double cut_out_on = cut_respa[0];
   double cut_out_off = cut_respa[1];
@@ -315,10 +315,10 @@ void PairLJCharmmCoulLongSoft::compute_middle()
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
-  inum = listmiddle->inum;
-  ilist = listmiddle->ilist;
-  numneigh = listmiddle->numneigh;
-  firstneigh = listmiddle->firstneigh;
+  inum = list->inum_middle;
+  ilist = list->ilist_middle;
+  numneigh = list->numneigh_middle;
+  firstneigh = list->firstneigh_middle;
 
   double cut_in_off = cut_respa[0];
   double cut_in_on = cut_respa[1];
@@ -428,10 +428,10 @@ void PairLJCharmmCoulLongSoft::compute_outer(int eflag, int vflag)
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
-  inum = listouter->inum;
-  ilist = listouter->ilist;
-  numneigh = listouter->numneigh;
-  firstneigh = listouter->firstneigh;
+  inum = list->inum;
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
 
   double cut_in_off = cut_respa[2];
   double cut_in_on = cut_respa[3];
@@ -758,19 +758,6 @@ void PairLJCharmmCoulLongSoft::init_style()
   g_ewald = force->kspace->g_ewald;
 }
 
-/* ----------------------------------------------------------------------
-   neighbor callback to inform pair style of neighbor list to use
-   regular or rRESPA
-------------------------------------------------------------------------- */
-
-void PairLJCharmmCoulLongSoft::init_list(int id, NeighList *ptr)
-{
-  if (id == 0) list = ptr;
-  else if (id == 1) listinner = ptr;
-  else if (id == 2) listmiddle = ptr;
-  else if (id == 3) listouter = ptr;
-}
-
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
diff --git a/src/USER-FEP/pair_lj_charmm_coul_long_soft.h b/src/USER-FEP/pair_lj_charmm_coul_long_soft.h
index 7e52ec54b556eeef746f8949e1777b2e8f7d2f4e..252c9f66f591c19bf9efa6fbfbb74c05aa685a79 100644
--- a/src/USER-FEP/pair_lj_charmm_coul_long_soft.h
+++ b/src/USER-FEP/pair_lj_charmm_coul_long_soft.h
@@ -33,7 +33,6 @@ class PairLJCharmmCoulLongSoft : public Pair {
   void settings(int, char **);
   void coeff(int, char **);
   void init_style();
-  void init_list(int, class NeighList *);
   double init_one(int, int);
   void write_restart(FILE *);
   void read_restart(FILE *);
diff --git a/src/USER-FEP/pair_lj_cut_coul_long_soft.cpp b/src/USER-FEP/pair_lj_cut_coul_long_soft.cpp
index f7c4084fe2471a6d809f4c2eba08eb390f2f0ae3..7be2ebabea1fc434f5db0e920adc9f26fa70a875 100644
--- a/src/USER-FEP/pair_lj_cut_coul_long_soft.cpp
+++ b/src/USER-FEP/pair_lj_cut_coul_long_soft.cpp
@@ -209,10 +209,10 @@ void PairLJCutCoulLongSoft::compute_inner()
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
-  inum = listinner->inum;
-  ilist = listinner->ilist;
-  numneigh = listinner->numneigh;
-  firstneigh = listinner->firstneigh;
+  inum = list->inum_inner;
+  ilist = list->ilist_inner;
+  numneigh = list->numneigh_inner;
+  firstneigh = list->firstneigh_inner;
 
   double cut_out_on = cut_respa[0];
   double cut_out_off = cut_respa[1];
@@ -299,10 +299,10 @@ void PairLJCutCoulLongSoft::compute_middle()
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
-  inum = listmiddle->inum;
-  ilist = listmiddle->ilist;
-  numneigh = listmiddle->numneigh;
-  firstneigh = listmiddle->firstneigh;
+  inum = list->inum_middle;
+  ilist = list->ilist_middle;
+  numneigh = list->numneigh_middle;
+  firstneigh = list->firstneigh_middle;
 
   double cut_in_off = cut_respa[0];
   double cut_in_on = cut_respa[1];
@@ -403,10 +403,10 @@ void PairLJCutCoulLongSoft::compute_outer(int eflag, int vflag)
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
-  inum = listouter->inum;
-  ilist = listouter->ilist;
-  numneigh = listouter->numneigh;
-  firstneigh = listouter->firstneigh;
+  inum = list->inum;
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
 
   double cut_in_off = cut_respa[2];
   double cut_in_on = cut_respa[3];
@@ -686,19 +686,6 @@ void PairLJCutCoulLongSoft::init_style()
   g_ewald = force->kspace->g_ewald;
 }
 
-/* ----------------------------------------------------------------------
-   neighbor callback to inform pair style of neighbor list to use
-   regular or rRESPA
-------------------------------------------------------------------------- */
-
-void PairLJCutCoulLongSoft::init_list(int id, NeighList *ptr)
-{
-  if (id == 0) list = ptr;
-  else if (id == 1) listinner = ptr;
-  else if (id == 2) listmiddle = ptr;
-  else if (id == 3) listouter = ptr;
-}
-
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
diff --git a/src/USER-FEP/pair_lj_cut_coul_long_soft.h b/src/USER-FEP/pair_lj_cut_coul_long_soft.h
index a03be3814add79367bff84fcf79236a7e1dadb5e..d49d1c8641816d8e3e0c13f9dce1d0dded07da4b 100644
--- a/src/USER-FEP/pair_lj_cut_coul_long_soft.h
+++ b/src/USER-FEP/pair_lj_cut_coul_long_soft.h
@@ -32,7 +32,6 @@ class PairLJCutCoulLongSoft : public Pair {
   virtual void settings(int, char **);
   void coeff(int, char **);
   virtual void init_style();
-  void init_list(int, class NeighList *);
   virtual double init_one(int, int);
   void write_restart(FILE *);
   void read_restart(FILE *);
diff --git a/src/USER-FEP/pair_lj_cut_soft.cpp b/src/USER-FEP/pair_lj_cut_soft.cpp
index 8b6280a61aff7cb9b2fca5629e2438ee97985a01..9ae108fa338849a37704e58b66e039fff49d09ee 100644
--- a/src/USER-FEP/pair_lj_cut_soft.cpp
+++ b/src/USER-FEP/pair_lj_cut_soft.cpp
@@ -164,10 +164,10 @@ void PairLJCutSoft::compute_inner()
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
 
-  inum = listinner->inum;
-  ilist = listinner->ilist;
-  numneigh = listinner->numneigh;
-  firstneigh = listinner->firstneigh;
+  inum = list->inum_inner;
+  ilist = list->ilist_inner;
+  numneigh = list->numneigh_inner;
+  firstneigh = list->firstneigh_inner;
 
   double cut_out_on = cut_respa[0];
   double cut_out_off = cut_respa[1];
@@ -242,10 +242,10 @@ void PairLJCutSoft::compute_middle()
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
 
-  inum = listmiddle->inum;
-  ilist = listmiddle->ilist;
-  numneigh = listmiddle->numneigh;
-  firstneigh = listmiddle->firstneigh;
+  inum = list->inum_middle;
+  ilist = list->ilist_middle;
+  numneigh = list->numneigh_middle;
+  firstneigh = list->firstneigh_middle;
 
   double cut_in_off = cut_respa[0];
   double cut_in_on = cut_respa[1];
@@ -333,10 +333,10 @@ void PairLJCutSoft::compute_outer(int eflag, int vflag)
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
 
-  inum = listouter->inum;
-  ilist = listouter->ilist;
-  numneigh = listouter->numneigh;
-  firstneigh = listouter->firstneigh;
+  inum = list->inum;
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
 
   double cut_in_off = cut_respa[2];
   double cut_in_on = cut_respa[3];
@@ -556,19 +556,6 @@ void PairLJCutSoft::init_style()
 
 }
 
-/* ----------------------------------------------------------------------
-   neighbor callback to inform pair style of neighbor list to use
-   regular or rRESPA
-------------------------------------------------------------------------- */
-
-void PairLJCutSoft::init_list(int id, NeighList *ptr)
-{
-  if (id == 0) list = ptr;
-  else if (id == 1) listinner = ptr;
-  else if (id == 2) listmiddle = ptr;
-  else if (id == 3) listouter = ptr;
-}
-
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
diff --git a/src/USER-FEP/pair_lj_cut_soft.h b/src/USER-FEP/pair_lj_cut_soft.h
index 50ce685e5cc527f223761ede879e44620ef53853..46202d78a8fe60f38b2f9a0ec13ee6799e9bd4ef 100644
--- a/src/USER-FEP/pair_lj_cut_soft.h
+++ b/src/USER-FEP/pair_lj_cut_soft.h
@@ -32,7 +32,6 @@ class PairLJCutSoft : public Pair {
   virtual void settings(int, char **);
   void coeff(int, char **);
   virtual void init_style();
-  void init_list(int, class NeighList *);
   virtual double init_one(int, int);
   void write_restart(FILE *);
   void read_restart(FILE *);
diff --git a/src/USER-INTEL/Install.sh b/src/USER-INTEL/Install.sh
index f7163e6791e7b56976d16d887d163b161bf5b209..da553d158ac24ff88ec9f86fd4e5b88ad7750191 100644
--- a/src/USER-INTEL/Install.sh
+++ b/src/USER-INTEL/Install.sh
@@ -46,7 +46,7 @@ action nbin_intel.h
 action nbin_intel.cpp
 action npair_intel.h
 action npair_intel.cpp
-action intel_simd.h pair_sw_intel.cpp
+action intel_simd.h
 action intel_intrinsics.h pair_tersoff_intel.cpp
 action intel_intrinsics_airebo.h pair_airebo_intel.cpp
 
diff --git a/src/USER-INTEL/README b/src/USER-INTEL/README
index 3b8444605775e8525d3f6c65f24f355e865c8bb7..871d881f39f3649cd67bc641116b6dcfbe539160 100644
--- a/src/USER-INTEL/README
+++ b/src/USER-INTEL/README
@@ -30,28 +30,37 @@ be added or changed in the Makefile depending on the version:
 
 2017 update 2         - No changes needed
 2017 updates 3 or 4   - Use -xCOMMON-AVX512 and not -xHost or -xCORE-AVX512
-2018 or newer         - Use -xHost or -xCORE-AVX512 and -qopt-zmm-usage=high 
+2018 inital release   - Use -xCOMMON-AVX512 and not -xHost or -xCORE-AVX512
+2018u1 or newer       - Use -xHost or -xCORE-AVX512 and -qopt-zmm-usage=high 
 
 -----------------------------------------------------------------------------
 
 When using the suffix command with "intel", intel styles will be used if they
 exist. If the suffix command is used with "hybrid intel omp" and the USER-OMP 
-USER-OMP styles will be used whenever USER-INTEL styles are not available. This
-allow for running most styles in LAMMPS with threading.
+is installed, USER-OMP styles will be used whenever USER-INTEL styles are not
+available. This allow for running most styles in LAMMPS with threading.
 
 -----------------------------------------------------------------------------
 
-The Long-Range Thread mode (LRT) in the Intel package currently uses
-pthreads by default. If pthreads are not supported in the build environment,
-the compile flag "-DLMP_INTEL_NOLRT" will disable the feature to allow for 
-builds without pthreads. Alternatively, "-DLMP_INTEL_LRT11" can be used to
-build with compilers that support threads using the C++11 standard. When using
+The Long-Range Thread mode (LRT) in the Intel package is enabled through the
+-DLMP_INTEL_USELRT define at compile time. All intel optimized makefiles
+include this define. This feature will use pthreads by default.
+Alternatively, "-DLMP_INTEL_LRT11" can be used to build with compilers that
+support threads intrinsically using the C++11 standard. When using
 LRT mode, you might need to disable OpenMP affinity settings (e.g.
 export KMP_AFFINITY=none). LAMMPS will generate a warning if the settings
 need to be changed.
 
 -----------------------------------------------------------------------------
 
+Unless Intel Math Kernel Library (MKL) is unavailable, -DLMP_USE_MKL_RNG
+should be added to the compile flags. This will enable using the MKL Mersenne
+Twister random number generator (RNG) for Dissipative Particle Dynamics 
+(DPD). This RNG can allow significantly faster performance and it also has a 
+significantly longer period than the standard RNG for DPD.
+
+-----------------------------------------------------------------------------
+
 In order to use offload to Intel(R) Xeon Phi(TM) coprocessors, the flag 
 -DLMP_INTEL_OFFLOAD should be set in the Makefile. Offload requires the use of 
 Intel compilers.
diff --git a/src/USER-INTEL/TEST/README b/src/USER-INTEL/TEST/README
index 434189dd263ecef43212a030f60889c4df0d998b..62602d592036e41353aacab21d7fef9d1aacacde 100644
--- a/src/USER-INTEL/TEST/README
+++ b/src/USER-INTEL/TEST/README
@@ -9,6 +9,7 @@
 # in.intel.tersoff -    Silicon benchmark with Tersoff
 # in.intel.water -      Coarse-grain water benchmark using Stillinger-Weber
 # in.intel.airebo -     Polyethelene benchmark with AIREBO
+# in.intel.dpd -        Dissipative Particle Dynamics
 #
 #############################################################################
 
@@ -16,16 +17,17 @@
 # Expected Timesteps/second with turbo on and HT enabled, LAMMPS June-2017
 #  - Compiled w/ Intel Parallel Studio 2017u2 and Makefile.intel_cpu_intelmpi
 #
-#                     Xeon E5-2697v4     Xeon Phi 7250
+#                     Xeon E5-2697v4     Xeon Phi 7250    Xeon Gold 6148
 #                    
-# in.intel.lj -            199.5               282.3
-# in.intel.rhodo -          12.4                17.5
-# in.intel.lc -	            19.0                25.7
-# in.intel.eam -            59.4                92.8
-# in.intel.sw -	           132.4               161.9
-# in.intel.tersoff -        83.3               101.1
-# in.intel.water -          53.4                90.3
-# in.intel.airebo -          7.3                11.8
+# in.intel.lj -            199.5               282.3            317.3
+# in.intel.rhodo -          12.4                17.5             24.4
+# in.intel.lc -	            19.0                25.7             26.8
+# in.intel.eam -            59.4                92.8            105.6 
+# in.intel.sw -	           132.4               161.9            213.8
+# in.intel.tersoff -        83.3               101.1            109.6
+# in.intel.water -          53.4                90.3            105.5
+# in.intel.airebo -          7.3                11.8             17.6
+# in.intel.dpd -            74.5               100.4            148.1
 #
 #############################################################################
 
diff --git a/src/USER-INTEL/TEST/in.intel.dpd b/src/USER-INTEL/TEST/in.intel.dpd
new file mode 100644
index 0000000000000000000000000000000000000000..e257d91f84b5cad67d0c78e4478ded62ccec4d80
--- /dev/null
+++ b/src/USER-INTEL/TEST/in.intel.dpd
@@ -0,0 +1,48 @@
+# DPD benchmark
+
+variable        N index on      # Newton Setting
+variable	w index 10	# Warmup Timesteps
+variable	t index 4000	# Main Run Timesteps
+variable	m index 1	# Main Run Timestep Multiplier
+variable	n index 0	# Use NUMA Mapping for Multi-Node
+variable	p index 0	# Use Power Measurement
+
+variable	x index 4
+variable	y index 2
+variable	z index 2
+
+variable	xx equal 20*$x
+variable	yy equal 20*$y
+variable	zz equal 20*$z
+variable	rr equal floor($t*$m)
+
+newton          $N
+if "$n > 0"	then "processors * * * grid numa"
+
+units		lj
+atom_style	atomic
+comm_modify     mode single vel yes
+
+lattice		fcc 3.0
+region		box block 0 ${xx} 0 ${yy} 0 ${zz}
+create_box	1 box
+create_atoms	1 box
+mass		1 1.0
+
+velocity	all create 1.0 87287 loop geom
+
+pair_style	dpd 1.0 1.0 928948
+pair_coeff	1 1 25.0 4.5
+
+neighbor	0.5 bin
+neigh_modify    delay 0 every 1
+
+fix		1 all nve
+timestep	0.04
+
+thermo			1000
+
+if "$p > 0"		then "run_style verlet/power"
+
+if "$w > 0"		then "run $w"
+run    	 ${rr}
diff --git a/src/USER-INTEL/angle_charmm_intel.cpp b/src/USER-INTEL/angle_charmm_intel.cpp
index d55afd47427302169ec649f8a12bc1a9e6012681..031c9642000c22a6aa7f4156db319de5409fce6e 100644
--- a/src/USER-INTEL/angle_charmm_intel.cpp
+++ b/src/USER-INTEL/angle_charmm_intel.cpp
@@ -336,7 +336,7 @@ void AngleCharmmIntel::pack_force_const(ForceConst<flt_t> &fc,
   const int bp1 = atom->nangletypes + 1;
   fc.set_ntypes(bp1,memory);
 
-  for (int i = 0; i < bp1; i++) {
+  for (int i = 1; i < bp1; i++) {
     fc.fc[i].k = k[i];
     fc.fc[i].theta0 = theta0[i];
     fc.fc[i].k_ub = k_ub[i];
diff --git a/src/USER-INTEL/angle_harmonic_intel.cpp b/src/USER-INTEL/angle_harmonic_intel.cpp
index 47e0add6906836e8549472b9018dd1d6cf6fd487..84220277d7a736131c30fa37d79fd93364e0c4ca 100644
--- a/src/USER-INTEL/angle_harmonic_intel.cpp
+++ b/src/USER-INTEL/angle_harmonic_intel.cpp
@@ -318,7 +318,7 @@ void AngleHarmonicIntel::pack_force_const(ForceConst<flt_t> &fc,
   const int bp1 = atom->nangletypes + 1;
   fc.set_ntypes(bp1,memory);
 
-  for (int i = 0; i < bp1; i++) {
+  for (int i = 1; i < bp1; i++) {
     fc.fc[i].k = k[i];
     fc.fc[i].theta0 = theta0[i];
   }
diff --git a/src/USER-INTEL/bond_fene_intel.cpp b/src/USER-INTEL/bond_fene_intel.cpp
index bb96135b2d7abeaf1e9a2b7990fae1fb17a314b0..93d64ed631f5466baec735c52a2770160b2eb8fb 100644
--- a/src/USER-INTEL/bond_fene_intel.cpp
+++ b/src/USER-INTEL/bond_fene_intel.cpp
@@ -295,7 +295,7 @@ void BondFENEIntel::pack_force_const(ForceConst<flt_t> &fc,
   const int bp1 = atom->nbondtypes + 1;
   fc.set_ntypes(bp1,memory);
 
-  for (int i = 0; i < bp1; i++) {
+  for (int i = 1; i < bp1; i++) {
     fc.fc[i].k = k[i];
     fc.fc[i].ir0sq = 1.0 / (r0[i] * r0[i]);
     fc.fc[i].sigma = sigma[i];
diff --git a/src/USER-INTEL/bond_harmonic_intel.cpp b/src/USER-INTEL/bond_harmonic_intel.cpp
index beb0ebcddaf52e1277859dad00fab1ce25a8aea3..0ac466f11386c99bef29b95a33e086349389239c 100644
--- a/src/USER-INTEL/bond_harmonic_intel.cpp
+++ b/src/USER-INTEL/bond_harmonic_intel.cpp
@@ -266,7 +266,7 @@ void BondHarmonicIntel::pack_force_const(ForceConst<flt_t> &fc,
   const int bp1 = atom->nbondtypes + 1;
   fc.set_ntypes(bp1,memory);
 
-  for (int i = 0; i < bp1; i++) {
+  for (int i = 1; i < bp1; i++) {
     fc.fc[i].k = k[i];
     fc.fc[i].r0 = r0[i];
   }
diff --git a/src/USER-INTEL/dihedral_charmm_intel.cpp b/src/USER-INTEL/dihedral_charmm_intel.cpp
index 715cef4d37c6a7b0f0e7afcfac57004ff02c9394..0e13e92251594d04348d98f2200c3501a5b29fd5 100644
--- a/src/USER-INTEL/dihedral_charmm_intel.cpp
+++ b/src/USER-INTEL/dihedral_charmm_intel.cpp
@@ -942,8 +942,8 @@ void DihedralCharmmIntel::pack_force_const(ForceConst<flt_t> &fc,
   buffers->set_ntypes(tp1);
 
   if (weightflag) {
-    for (int i = 0; i < tp1; i++) {
-      for (int j = 0; j < tp1; j++) {
+    for (int i = 1; i < tp1; i++) {
+      for (int j = 1; j < tp1; j++) {
         fc.ljp[i][j].lj1 = lj14_1[i][j];
         fc.ljp[i][j].lj2 = lj14_2[i][j];
         fc.ljp[i][j].lj3 = lj14_3[i][j];
@@ -952,7 +952,7 @@ void DihedralCharmmIntel::pack_force_const(ForceConst<flt_t> &fc,
     }
   }
 
-  for (int i = 0; i < bp1; i++) {
+  for (int i = 1; i < bp1; i++) {
     fc.bp[i].multiplicity = multiplicity[i];
     fc.bp[i].cos_shift = cos_shift[i];
     fc.bp[i].sin_shift = sin_shift[i];
diff --git a/src/USER-INTEL/dihedral_fourier_intel.cpp b/src/USER-INTEL/dihedral_fourier_intel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..805ffc0e256177f294bc8f2b118e778c4cd7054e
--- /dev/null
+++ b/src/USER-INTEL/dihedral_fourier_intel.cpp
@@ -0,0 +1,441 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: W. Michael Brown (Intel)
+------------------------------------------------------------------------- */
+
+#include <mpi.h>
+#include <math.h>
+#include "dihedral_fourier_intel.h"
+#include "atom.h"
+#include "comm.h"
+#include "memory.h"
+#include "neighbor.h"
+#include "domain.h"
+#include "force.h"
+#include "pair.h"
+#include "update.h"
+#include "error.h"
+
+#include "suffix.h"
+using namespace LAMMPS_NS;
+
+#define PTOLERANCE (flt_t)1.05
+#define MTOLERANCE (flt_t)-1.05
+typedef struct { int a,b,c,d,t;  } int5_t;
+
+/* ---------------------------------------------------------------------- */
+
+DihedralFourierIntel::DihedralFourierIntel(class LAMMPS *lmp)
+  : DihedralFourier(lmp)
+{
+  suffix_flag |= Suffix::INTEL;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void DihedralFourierIntel::compute(int eflag, int vflag)
+{
+  #ifdef _LMP_INTEL_OFFLOAD
+  if (_use_base) {
+    DihedralFourier::compute(eflag, vflag);
+    return;
+  }
+  #endif
+
+  if (fix->precision() == FixIntel::PREC_MODE_MIXED)
+    compute<float,double>(eflag, vflag, fix->get_mixed_buffers(),
+                          force_const_single);
+  else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
+    compute<double,double>(eflag, vflag, fix->get_double_buffers(),
+                           force_const_double);
+  else
+    compute<float,float>(eflag, vflag, fix->get_single_buffers(),
+                         force_const_single);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <class flt_t, class acc_t>
+void DihedralFourierIntel::compute(int eflag, int vflag,
+				   IntelBuffers<flt_t,acc_t> *buffers,
+				   const ForceConst<flt_t> &fc)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  if (evflag) {
+    if (vflag && !eflag) {
+      if (force->newton_bond)
+        eval<0,1,1>(vflag, buffers, fc);
+      else
+        eval<0,1,0>(vflag, buffers, fc);
+    } else {
+      if (force->newton_bond)
+        eval<1,1,1>(vflag, buffers, fc);
+      else
+        eval<1,1,0>(vflag, buffers, fc);
+    }
+  } else {
+    if (force->newton_bond)
+      eval<0,0,1>(vflag, buffers, fc);
+    else
+      eval<0,0,0>(vflag, buffers, fc);
+  }
+}
+
+template <int EFLAG, int VFLAG, int NEWTON_BOND, class flt_t, class acc_t>
+void DihedralFourierIntel::eval(const int vflag,
+				IntelBuffers<flt_t,acc_t> *buffers,
+				const ForceConst<flt_t> &fc)
+
+{
+  const int inum = neighbor->ndihedrallist;
+  if (inum == 0) return;
+
+  ATOM_T * _noalias const x = buffers->get_x(0);
+  const int nlocal = atom->nlocal;
+  const int nall = nlocal + atom->nghost;
+
+  int f_stride;
+  if (NEWTON_BOND) f_stride = buffers->get_stride(nall);
+  else f_stride = buffers->get_stride(nlocal);
+
+  int tc;
+  FORCE_T * _noalias f_start;
+  acc_t * _noalias ev_global;
+  IP_PRE_get_buffers(0, buffers, fix, tc, f_start, ev_global);
+  const int nthreads = tc;
+
+  acc_t oedihedral, ov0, ov1, ov2, ov3, ov4, ov5;
+  if (EFLAG) oedihedral = (acc_t)0.0;
+  if (VFLAG && vflag) {
+    ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0;
+  }
+
+  #if defined(_OPENMP)
+  #pragma omp parallel default(none) \
+    shared(f_start,f_stride,fc)           \
+    reduction(+:oedihedral,ov0,ov1,ov2,ov3,ov4,ov5)
+  #endif
+  {
+    int nfrom, npl, nto, tid;
+    #ifdef LMP_INTEL_USE_SIMDOFF
+    IP_PRE_omp_range_id(nfrom, nto, tid, inum, nthreads);
+    #else
+    IP_PRE_omp_stride_id(nfrom, npl, nto, tid, inum, nthreads);
+    #endif
+
+    FORCE_T * _noalias const f = f_start + (tid * f_stride);
+    if (fix->need_zero(tid))
+      memset(f, 0, f_stride * sizeof(FORCE_T));
+
+    const int5_t * _noalias const dihedrallist =
+      (int5_t *) neighbor->dihedrallist[0];
+
+    #ifdef LMP_INTEL_USE_SIMDOFF
+    acc_t sedihedral, sv0, sv1, sv2, sv3, sv4, sv5;
+    if (EFLAG) sedihedral = (acc_t)0.0;
+    if (VFLAG && vflag) {
+      sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
+    }
+    #pragma simd reduction(+:sedihedral, sv0, sv1, sv2, sv3, sv4, sv5)
+    for (int n = nfrom; n < nto; n ++) {
+    #else
+    for (int n = nfrom; n < nto; n += npl) {
+    #endif
+      const int i1 = dihedrallist[n].a;
+      const int i2 = dihedrallist[n].b;
+      const int i3 = dihedrallist[n].c;
+      const int i4 = dihedrallist[n].d;
+      const int type = dihedrallist[n].t;
+
+      // 1st bond
+
+      const flt_t vb1x = x[i1].x - x[i2].x;
+      const flt_t vb1y = x[i1].y - x[i2].y;
+      const flt_t vb1z = x[i1].z - x[i2].z;
+
+      // 2nd bond
+
+      const flt_t vb2xm = x[i2].x - x[i3].x;
+      const flt_t vb2ym = x[i2].y - x[i3].y;
+      const flt_t vb2zm = x[i2].z - x[i3].z;
+
+      // 3rd bond
+
+      const flt_t vb3x = x[i4].x - x[i3].x;
+      const flt_t vb3y = x[i4].y - x[i3].y;
+      const flt_t vb3z = x[i4].z - x[i3].z;
+
+      // c,s calculation
+
+      const flt_t ax = vb1y*vb2zm - vb1z*vb2ym;
+      const flt_t ay = vb1z*vb2xm - vb1x*vb2zm;
+      const flt_t az = vb1x*vb2ym - vb1y*vb2xm;
+      const flt_t bx = vb3y*vb2zm - vb3z*vb2ym;
+      const flt_t by = vb3z*vb2xm - vb3x*vb2zm;
+      const flt_t bz = vb3x*vb2ym - vb3y*vb2xm;
+
+      const flt_t rasq = ax*ax + ay*ay + az*az;
+      const flt_t rbsq = bx*bx + by*by + bz*bz;
+      const flt_t rgsq = vb2xm*vb2xm + vb2ym*vb2ym + vb2zm*vb2zm;
+      const flt_t rg = sqrt(rgsq);
+
+      flt_t rginv, ra2inv, rb2inv;
+      rginv = ra2inv = rb2inv = (flt_t)0.0;
+      if (rg > 0) rginv = (flt_t)1.0/rg;
+      if (rasq > 0) ra2inv = (flt_t)1.0/rasq;
+      if (rbsq > 0) rb2inv = (flt_t)1.0/rbsq;
+      const flt_t rabinv = sqrt(ra2inv*rb2inv);
+
+      flt_t c = (ax*bx + ay*by + az*bz)*rabinv;
+      const flt_t s = rg*rabinv*(ax*vb3x + ay*vb3y + az*vb3z);
+
+      // error check
+      #ifndef LMP_INTEL_USE_SIMDOFF
+      if (c > PTOLERANCE || c < MTOLERANCE) {
+        int me = comm->me;
+
+        if (screen) {
+          char str[128];
+          sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " "
+                  TAGINT_FORMAT " " TAGINT_FORMAT " "
+                  TAGINT_FORMAT " " TAGINT_FORMAT,
+                  me,tid,update->ntimestep,
+                  atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
+          error->warning(FLERR,str,0);
+          fprintf(screen,"  1st atom: %d %g %g %g\n",
+                  me,x[i1].x,x[i1].y,x[i1].z);
+          fprintf(screen,"  2nd atom: %d %g %g %g\n",
+                  me,x[i2].x,x[i2].y,x[i2].z);
+          fprintf(screen,"  3rd atom: %d %g %g %g\n",
+                  me,x[i3].x,x[i3].y,x[i3].z);
+          fprintf(screen,"  4th atom: %d %g %g %g\n",
+                  me,x[i4].x,x[i4].y,x[i4].z);
+        }
+      }
+      #endif
+
+      if (c > (flt_t)1.0) c = (flt_t)1.0;
+      if (c < (flt_t)-1.0) c = (flt_t)-1.0;
+
+      flt_t deng;
+      flt_t df = (flt_t)0.0;
+      if (EFLAG) deng = (flt_t)0.0;
+      
+      for (int j = 0; j < nterms[type]; j++) {
+	const flt_t tcos_shift = fc.bp[j][type].cos_shift;
+	const flt_t tsin_shift = fc.bp[j][type].sin_shift;
+	const flt_t tk = fc.bp[j][type].k;
+	const int m = fc.bp[j][type].multiplicity;
+
+	flt_t p = (flt_t)1.0;
+	flt_t ddf1, df1;
+	ddf1 = df1 = (flt_t)0.0;
+
+	for (int i = 0; i < m; i++) {
+	  ddf1 = p*c - df1*s;
+	  df1 = p*s + df1*c;
+	  p = ddf1;
+	}
+
+	p = p*tcos_shift + df1*tsin_shift;
+	df1 = df1*tcos_shift - ddf1*tsin_shift;
+	df1 *= -m;
+	p += (flt_t)1.0;
+	
+	if (m == 0) {
+	  p = (flt_t)1.0 + tcos_shift;
+	  df1 = (flt_t)0.0;
+	}
+
+        if (EFLAG) deng += tk * p;
+        df -= tk * df1;
+      }
+
+      const flt_t fg = vb1x*vb2xm + vb1y*vb2ym + vb1z*vb2zm;
+      const flt_t hg = vb3x*vb2xm + vb3y*vb2ym + vb3z*vb2zm;
+      const flt_t fga = fg*ra2inv*rginv;
+      const flt_t hgb = hg*rb2inv*rginv;
+      const flt_t gaa = -ra2inv*rg;
+      const flt_t gbb = rb2inv*rg;
+
+      const flt_t dtfx = gaa*ax;
+      const flt_t dtfy = gaa*ay;
+      const flt_t dtfz = gaa*az;
+      const flt_t dtgx = fga*ax - hgb*bx;
+      const flt_t dtgy = fga*ay - hgb*by;
+      const flt_t dtgz = fga*az - hgb*bz;
+      const flt_t dthx = gbb*bx;
+      const flt_t dthy = gbb*by;
+      const flt_t dthz = gbb*bz;
+
+      const flt_t sx2 = df*dtgx;
+      const flt_t sy2 = df*dtgy;
+      const flt_t sz2 = df*dtgz;
+
+      flt_t f1x = df*dtfx;
+      flt_t f1y = df*dtfy;
+      flt_t f1z = df*dtfz;
+
+      const flt_t f2x = sx2 - f1x;
+      const flt_t f2y = sy2 - f1y;
+      const flt_t f2z = sz2 - f1z;
+
+      flt_t f4x = df*dthx;
+      flt_t f4y = df*dthy;
+      flt_t f4z = df*dthz;
+
+      const flt_t f3x = -sx2 - f4x;
+      const flt_t f3y = -sy2 - f4y;
+      const flt_t f3z = -sz2 - f4z;
+
+      if (EFLAG || VFLAG) {
+        #ifdef LMP_INTEL_USE_SIMDOFF
+        IP_PRE_ev_tally_dihed(EFLAG, VFLAG, eatom, vflag, deng, i1, i2, i3, i4,
+                              f1x, f1y, f1z, f3x, f3y, f3z, f4x, f4y, f4z,
+                              vb1x, vb1y, vb1z, -vb2xm, -vb2ym, -vb2zm, vb3x,
+                              vb3y, vb3z, sedihedral, f, NEWTON_BOND, nlocal,
+                              sv0, sv1, sv2, sv3, sv4, sv5);
+        #else
+        IP_PRE_ev_tally_dihed(EFLAG, VFLAG, eatom, vflag, deng, i1, i2, i3, i4,
+                              f1x, f1y, f1z, f3x, f3y, f3z, f4x, f4y, f4z,
+                              vb1x, vb1y, vb1z, -vb2xm, -vb2ym, -vb2zm, vb3x,
+                              vb3y, vb3z, oedihedral, f, NEWTON_BOND, nlocal,
+                              ov0, ov1, ov2, ov3, ov4, ov5);
+        #endif
+      }
+
+      #ifdef LMP_INTEL_USE_SIMDOFF
+      #pragma simdoff
+      #endif
+      {
+        if (NEWTON_BOND || i1 < nlocal) {
+          f[i1].x += f1x;
+          f[i1].y += f1y;
+          f[i1].z += f1z;
+        }
+
+        if (NEWTON_BOND || i2 < nlocal) {
+          f[i2].x += f2x;
+          f[i2].y += f2y;
+          f[i2].z += f2z;
+        }
+
+        if (NEWTON_BOND || i3 < nlocal) {
+          f[i3].x += f3x;
+          f[i3].y += f3y;
+          f[i3].z += f3z;
+        }
+
+        if (NEWTON_BOND || i4 < nlocal) {
+          f[i4].x += f4x;
+          f[i4].y += f4y;
+          f[i4].z += f4z;
+        }
+      }
+    } // for n
+    #ifdef LMP_INTEL_USE_SIMDOFF
+    if (EFLAG) oedihedral += sedihedral;
+    if (VFLAG && vflag) {
+        ov0 += sv0; ov1 += sv1; ov2 += sv2;
+        ov3 += sv3; ov4 += sv4; ov5 += sv5;
+    }
+    #endif
+  } // omp parallel
+
+  if (EFLAG) energy += oedihedral;
+  if (VFLAG && vflag) {
+    virial[0] += ov0; virial[1] += ov1; virial[2] += ov2;
+    virial[3] += ov3; virial[4] += ov4; virial[5] += ov5;
+  }
+
+  fix->set_reduce_flag();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void DihedralFourierIntel::init_style()
+{
+  DihedralFourier::init_style();
+
+  int ifix = modify->find_fix("package_intel");
+  if (ifix < 0)
+    error->all(FLERR,
+               "The 'package intel' command is required for /intel styles");
+  fix = static_cast<FixIntel *>(modify->fix[ifix]);
+
+  #ifdef _LMP_INTEL_OFFLOAD
+  _use_base = 0;
+  if (fix->offload_balance() != 0.0) {
+    _use_base = 1;
+    return;
+  }
+  #endif
+
+  fix->bond_init_check();
+
+  if (fix->precision() == FixIntel::PREC_MODE_MIXED)
+    pack_force_const(force_const_single, fix->get_mixed_buffers());
+  else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
+    pack_force_const(force_const_double, fix->get_double_buffers());
+  else
+    pack_force_const(force_const_single, fix->get_single_buffers());
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <class flt_t, class acc_t>
+void DihedralFourierIntel::pack_force_const(ForceConst<flt_t> &fc,
+					    IntelBuffers<flt_t,acc_t> *buffers)
+{
+  const int bp1 = atom->ndihedraltypes + 1;
+  fc.set_ntypes(bp1, setflag, nterms, memory);
+
+  for (int i = 1; i < bp1; i++) {
+    if (setflag[i]) {
+      for (int j = 0; j < nterms[i]; j++) {
+        fc.bp[j][i].cos_shift = cos_shift[i][j];
+	fc.bp[j][i].sin_shift = sin_shift[i][j];
+	fc.bp[j][i].k = k[i][j];
+	fc.bp[j][i].multiplicity = multiplicity[i][j];
+      }
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <class flt_t>
+void DihedralFourierIntel::ForceConst<flt_t>::set_ntypes(const int nbondtypes,
+                                                         int *setflag,
+							 int *nterms,
+							 Memory *memory) {
+  if (nbondtypes != _nbondtypes) {
+    if (_nbondtypes > 0)
+      _memory->destroy(bp);
+
+    if (nbondtypes > 0) {
+      _maxnterms = 1;
+      for (int i = 1; i <= nbondtypes; i++)
+        if (setflag[i]) _maxnterms = MAX(_maxnterms, nterms[i]);
+
+      _memory->create(bp, _maxnterms, nbondtypes, "dihedralfourierintel.bp");
+    }
+  }
+  _nbondtypes = nbondtypes;
+  _memory = memory;
+}
diff --git a/src/USER-INTEL/dihedral_fourier_intel.h b/src/USER-INTEL/dihedral_fourier_intel.h
new file mode 100644
index 0000000000000000000000000000000000000000..a775e129f432a041b9687aaa4cf2c69cfd78f6c7
--- /dev/null
+++ b/src/USER-INTEL/dihedral_fourier_intel.h
@@ -0,0 +1,82 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: W. Michael Brown (Intel)
+------------------------------------------------------------------------- */
+
+#ifdef DIHEDRAL_CLASS
+
+DihedralStyle(fourier/intel,DihedralFourierIntel)
+
+#else
+
+#ifndef LMP_DIHEDRAL_FOURIER_INTEL_H
+#define LMP_DIHEDRAL_FOURIER_INTEL_H
+
+#include "dihedral_fourier.h"
+#include "fix_intel.h"
+
+namespace LAMMPS_NS {
+
+class DihedralFourierIntel : public DihedralFourier {
+
+ public:
+  DihedralFourierIntel(class LAMMPS *lmp);
+  virtual void compute(int, int);
+  void init_style();
+
+ private:
+  FixIntel *fix;
+
+  template <class flt_t> class ForceConst;
+  template <class flt_t, class acc_t>
+  void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers,
+               const ForceConst<flt_t> &fc);
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND, class flt_t, class acc_t>
+  void eval(const int vflag, IntelBuffers<flt_t,acc_t> * buffers,
+            const ForceConst<flt_t> &fc);
+  template <class flt_t, class acc_t>
+  void pack_force_const(ForceConst<flt_t> &fc,
+                        IntelBuffers<flt_t, acc_t> *buffers);
+
+  #ifdef _LMP_INTEL_OFFLOAD
+  int _use_base;
+  #endif
+
+  template <class flt_t>
+  class ForceConst {
+   public:
+    typedef struct { flt_t cos_shift, sin_shift, k; 
+      int multiplicity; } fc_packed1;
+
+    fc_packed1 **bp;
+
+    ForceConst() : _nbondtypes(0)  {}
+    ~ForceConst() { set_ntypes(0, NULL, NULL, NULL); }
+
+    void set_ntypes(const int nbondtypes, int *setflag, int *nterms, 
+		    Memory *memory);
+
+   private:
+    int _nbondtypes, _maxnterms;
+    Memory *_memory;
+  };
+  ForceConst<float> force_const_single;
+  ForceConst<double> force_const_double;
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-INTEL/dihedral_harmonic_intel.cpp b/src/USER-INTEL/dihedral_harmonic_intel.cpp
index 196b024fa62d9ed9e192ea9f4b7395b7173c206c..5d16b0da745425abd66617ab50043ef796d78db3 100644
--- a/src/USER-INTEL/dihedral_harmonic_intel.cpp
+++ b/src/USER-INTEL/dihedral_harmonic_intel.cpp
@@ -400,7 +400,7 @@ void DihedralHarmonicIntel::pack_force_const(ForceConst<flt_t> &fc,
   const int bp1 = atom->ndihedraltypes + 1;
   fc.set_ntypes(bp1,memory);
 
-  for (int i = 0; i < bp1; i++) {
+  for (int i = 1; i < bp1; i++) {
     fc.bp[i].multiplicity = multiplicity[i];
     fc.bp[i].cos_shift = cos_shift[i];
     fc.bp[i].sin_shift = sin_shift[i];
diff --git a/src/USER-INTEL/dihedral_opls_intel.cpp b/src/USER-INTEL/dihedral_opls_intel.cpp
index 1abeba1d5ea5d71a6ed27033502c233a183c8a71..e290ab90616b94df0135264dcbbf66fe8546a36a 100644
--- a/src/USER-INTEL/dihedral_opls_intel.cpp
+++ b/src/USER-INTEL/dihedral_opls_intel.cpp
@@ -427,7 +427,7 @@ void DihedralOPLSIntel::pack_force_const(ForceConst<flt_t> &fc,
   const int bp1 = atom->ndihedraltypes + 1;
   fc.set_ntypes(bp1,memory);
 
-  for (int i = 0; i < bp1; i++) {
+  for (int i = 1; i < bp1; i++) {
     fc.bp[i].k1 = k1[i];
     fc.bp[i].k2 = k2[i];
     fc.bp[i].k3 = k3[i];
diff --git a/src/USER-INTEL/fix_intel.cpp b/src/USER-INTEL/fix_intel.cpp
index 637fc0d06e888c409733ee722437ef33ab286ac0..eac48b8510b4aa67a5e93f689ae1381510e02b57 100644
--- a/src/USER-INTEL/fix_intel.cpp
+++ b/src/USER-INTEL/fix_intel.cpp
@@ -285,6 +285,7 @@ int FixIntel::setmask()
 {
   int mask = 0;
   mask |= PRE_REVERSE;
+  mask |= MIN_PRE_REVERSE;
   #ifdef _LMP_INTEL_OFFLOAD
   mask |= POST_FORCE;
   mask |= MIN_POST_FORCE;
diff --git a/src/USER-INTEL/fix_intel.h b/src/USER-INTEL/fix_intel.h
index 068e5ed890354275a3b6b1b5835c530733711d4a..d7093e79bb68c91d785a66f257338bc6ccff81bf 100644
--- a/src/USER-INTEL/fix_intel.h
+++ b/src/USER-INTEL/fix_intel.h
@@ -43,6 +43,7 @@ class FixIntel : public Fix {
   virtual int setmask();
   virtual void init();
   virtual void setup(int);
+  inline void min_setup(int in) { setup(in); }
   void setup_pre_reverse(int eflag = 0, int vflag = 0);
 
   void pair_init_check(const bool cdmessage=false);
@@ -50,6 +51,8 @@ class FixIntel : public Fix {
   void kspace_init_check();
 
   void pre_reverse(int eflag = 0, int vflag = 0);
+  inline void min_pre_reverse(int eflag = 0, int vflag = 0)
+    { pre_reverse(eflag, vflag); }
 
   // Get all forces, calculation results from coprocesser
   void sync_coprocessor();
diff --git a/src/USER-INTEL/improper_cvff_intel.cpp b/src/USER-INTEL/improper_cvff_intel.cpp
index dc9765d9130670ec7dd2e991dc8d936b66240142..39090e5a7420b3fd32598cbab2aca8a802e497b1 100644
--- a/src/USER-INTEL/improper_cvff_intel.cpp
+++ b/src/USER-INTEL/improper_cvff_intel.cpp
@@ -433,7 +433,7 @@ void ImproperCvffIntel::pack_force_const(ForceConst<flt_t> &fc,
   const int bp1 = atom->nimpropertypes + 1;
   fc.set_ntypes(bp1,memory);
 
-  for (int i = 0; i < bp1; i++) {
+  for (int i = 1; i < bp1; i++) {
     fc.fc[i].k = k[i];
     fc.fc[i].sign = sign[i];
     fc.fc[i].multiplicity = multiplicity[i];
diff --git a/src/USER-INTEL/improper_harmonic_intel.cpp b/src/USER-INTEL/improper_harmonic_intel.cpp
index fe0efca5ec163d50f4a75643223f6c8df0980a95..3547061672cff079b53761f928761e746d637d8a 100644
--- a/src/USER-INTEL/improper_harmonic_intel.cpp
+++ b/src/USER-INTEL/improper_harmonic_intel.cpp
@@ -389,7 +389,7 @@ void ImproperHarmonicIntel::pack_force_const(ForceConst<flt_t> &fc,
   const int bp1 = atom->nimpropertypes + 1;
   fc.set_ntypes(bp1,memory);
 
-  for (int i = 0; i < bp1; i++) {
+  for (int i = 1; i < bp1; i++) {
     fc.fc[i].k = k[i];
     fc.fc[i].chi = chi[i];
   }
diff --git a/src/USER-INTEL/intel_buffers.cpp b/src/USER-INTEL/intel_buffers.cpp
index b4b664cb943354c3e38b97919fa4d2c5b53bb5aa..ac208f5a0ca7d934e65ac1a1a3decffab9614ffe 100644
--- a/src/USER-INTEL/intel_buffers.cpp
+++ b/src/USER-INTEL/intel_buffers.cpp
@@ -409,6 +409,7 @@ void IntelBuffers<flt_t, acc_t>::grow_ccache(const int off_flag,
   IP_PRE_get_stride(_ccache_stride3, nsize * 3, sizeof(acc_t), 0);
   lmp->memory->create(_ccachef, _ccache_stride3 * nt, "_ccachef");
   #endif
+  memset(_ccachei, 0, vsize * sizeof(int));
   memset(_ccachej, 0, vsize * sizeof(int));
 
   #ifdef _LMP_INTEL_OFFLOAD
@@ -425,7 +426,7 @@ void IntelBuffers<flt_t, acc_t>::grow_ccache(const int off_flag,
       #pragma offload_transfer target(mic:_cop) \
         nocopy(ccachex,ccachey:length(vsize) alloc_if(1) free_if(0)) \
         nocopy(ccachez,ccachew:length(vsize) alloc_if(1) free_if(0)) \
-        nocopy(ccachei:length(vsize) alloc_if(1) free_if(0)) \
+        in(ccachei:length(vsize) alloc_if(1) free_if(0)) \
         in(ccachej:length(vsize) alloc_if(1) free_if(0))
     }
     #ifdef LMP_USE_AVXCD
diff --git a/src/USER-INTEL/intel_preprocess.h b/src/USER-INTEL/intel_preprocess.h
index a7663d54a68bebaebaeae1148690d550f2cf2c38..068f61023edfd0b08a7a6d983b2ce059bab1e4cc 100644
--- a/src/USER-INTEL/intel_preprocess.h
+++ b/src/USER-INTEL/intel_preprocess.h
@@ -211,10 +211,9 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR,
                            datasize);                           \
   }
 
-#define IP_PRE_omp_range_id_vec(ifrom, ito, tid, inum,          \
-                                nthreads, vecsize)              \
+#define IP_PRE_omp_range_vec(ifrom, ito, tid, inum, nthreads,	\
+                             vecsize)				\
   {                                                             \
-    tid = omp_get_thread_num();                                 \
     int idelta = static_cast<int>(ceil(static_cast<float>(inum) \
                                        /vecsize/nthreads));     \
     idelta *= vecsize;                                          \
@@ -223,6 +222,14 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR,
     if (ito > inum) ito = inum;                                 \
   }
 
+#define IP_PRE_omp_range_id_vec(ifrom, ito, tid, inum,          \
+                                nthreads, vecsize)              \
+  {                                                             \
+    tid = omp_get_thread_num();                                 \
+    IP_PRE_omp_range_vec(ifrom, ito, tid, inum, nthreads,	\
+			 vecsize);				\
+  }
+
 #define IP_PRE_omp_stride_id_vec(ifrom, ip, ito, tid, inum,     \
                                  nthr, vecsize)                 \
   {                                                             \
@@ -235,13 +242,12 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR,
       int nd = nthr / INTEL_HTHREADS;                           \
       int td = tid / INTEL_HTHREADS;                            \
       int tm = tid % INTEL_HTHREADS;                            \
-      IP_PRE_omp_range_id_vec(ifrom, ito, td, inum, nd,         \
-        vecsize);                                               \
+      IP_PRE_omp_range_vec(ifrom, ito, td, inum, nd, vecsize);	\
       ifrom += tm * vecsize;                                    \
       ip = INTEL_HTHREADS * vecsize;                            \
     } else {                                                    \
-      IP_PRE_omp_range_id_vec(ifrom, ito, tid, inum, nthr,      \
-                              vecsize);                         \
+      IP_PRE_omp_range_vec(ifrom, ito, tid, inum, nthr,		\
+			   vecsize);				\
       ip = vecsize;                                             \
     }                                                           \
   }
@@ -292,6 +298,15 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR,
     ito = inum;                                                 \
   }
 
+#define IP_PRE_omp_stride_id_vec(ifrom, ip, ito, tid, inum,     \
+                                 nthr, vecsize)                 \
+  {                                                             \
+    tid = 0;							\
+    ifrom = 0;							\
+    ip = 1;							\
+    ito = inum;							\
+  }
+
 #endif
 
 #define IP_PRE_fdotr_acc_force_l5(lf, lt, minlocal, nthreads, f_start,  \
diff --git a/src/USER-INTEL/npair_full_bin_ghost_intel.cpp b/src/USER-INTEL/npair_full_bin_ghost_intel.cpp
index 12101712f11ec833b6501b88959844ce9085854b..e6d45d7b2c6941fa87c24cd089ee46863b4828ee 100644
--- a/src/USER-INTEL/npair_full_bin_ghost_intel.cpp
+++ b/src/USER-INTEL/npair_full_bin_ghost_intel.cpp
@@ -319,7 +319,6 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,
 	      const int bstart = binhead[ibin + binstart[k]];
 	      const int bend = binhead[ibin + binend[k]];
               #if defined(LMP_SIMD_COMPILER)
-              #pragma vector aligned
               #pragma simd
               #endif
               for (int jj = bstart; jj < bend; jj++)
@@ -341,7 +340,6 @@ void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,
 	      const int bstart = binhead[ibin + stencil[k]];
 	      const int bend = binhead[ibin + stencil[k] + 1];
               #if defined(LMP_SIMD_COMPILER)
-              #pragma vector aligned
               #pragma simd
               #endif
               for (int jj = bstart; jj < bend; jj++)
diff --git a/src/USER-INTEL/npair_intel.cpp b/src/USER-INTEL/npair_intel.cpp
index 79dc75366e80d73db0c70ef7cb46d45dce54c1f0..0068e02635c7d6d7bd145fe45a35bade50f64f0c 100644
--- a/src/USER-INTEL/npair_intel.cpp
+++ b/src/USER-INTEL/npair_intel.cpp
@@ -273,7 +273,6 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
             const int bstart = binhead[ibin + binstart[k]];
             const int bend = binhead[ibin + binend[k]];
             #if defined(LMP_SIMD_COMPILER)
-            #pragma vector aligned
             #pragma simd
             #endif
             for (int jj = bstart; jj < bend; jj++)
@@ -307,7 +306,6 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
             const int bstart = binhead[ibin];
             const int bend = binhead[ibin + 1];
             #if defined(LMP_SIMD_COMPILER)
-            #pragma vector aligned
             #pragma simd
             #endif
             for (int jj = bstart; jj < bend; jj++) {
diff --git a/src/USER-INTEL/pair_buck_coul_cut_intel.cpp b/src/USER-INTEL/pair_buck_coul_cut_intel.cpp
index 07beae1e41683d781b2f80df9dbe2e1d790c66cb..e32fd066607fff224be9bcf2d606c4472864220e 100644
--- a/src/USER-INTEL/pair_buck_coul_cut_intel.cpp
+++ b/src/USER-INTEL/pair_buck_coul_cut_intel.cpp
@@ -458,8 +458,8 @@ void PairBuckCoulCutIntel::pack_force_const(ForceConst<flt_t> &fc,
     fc.special_lj[0] = 1.0;
   }
 
-  for (int i = 0; i < tp1; i++) {
-    for (int j = 0; j < tp1; j++) {
+  for (int i = 1; i < tp1; i++) {
+    for (int j = 1; j < tp1; j++) {
       fc.c_cut[i][j].cutsq = cutsq[i][j];
       fc.c_cut[i][j].cut_ljsq = cut_ljsq[i][j];
       fc.c_cut[i][j].cut_coulsq = cut_coulsq[i][j];
diff --git a/src/USER-INTEL/pair_buck_coul_long_intel.cpp b/src/USER-INTEL/pair_buck_coul_long_intel.cpp
index 995e2e85839d3503fdefd8cc9100a7d49a78f19c..2b9fea7a50f80c0298069d976c3211d47bffc12e 100644
--- a/src/USER-INTEL/pair_buck_coul_long_intel.cpp
+++ b/src/USER-INTEL/pair_buck_coul_long_intel.cpp
@@ -545,8 +545,8 @@ void PairBuckCoulLongIntel::pack_force_const(ForceConst<flt_t> &fc,
     fc.special_lj[0] = 1.0;
   }
 
-  for (int i = 0; i < tp1; i++) {
-    for (int j = 0; j < tp1; j++) {
+  for (int i = 1; i < tp1; i++) {
+    for (int j = 1; j < tp1; j++) {
       if (cutsq[i][j] < cut_ljsq[i][j])
         error->all(FLERR,
          "Intel variant of lj/buck/coul/long expects lj cutoff<=coulombic");
diff --git a/src/USER-INTEL/pair_buck_intel.cpp b/src/USER-INTEL/pair_buck_intel.cpp
index 8c63d2e62d5d9a419ec627d9da01bbf64cfd77a3..05a28eb7f05b71e3df46b4403ae6971b70351295 100644
--- a/src/USER-INTEL/pair_buck_intel.cpp
+++ b/src/USER-INTEL/pair_buck_intel.cpp
@@ -413,8 +413,8 @@ void PairBuckIntel::pack_force_const(ForceConst<flt_t> &fc,
     fc.special_lj[0] = 1.0;
   }
 
-  for (int i = 0; i < tp1; i++) {
-    for (int j = 0; j < tp1; j++) {
+  for (int i = 1; i < tp1; i++) {
+    for (int j = 1; j < tp1; j++) {
       fc.c_force[i][j].buck1 = buck1[i][j];
       fc.c_force[i][j].buck2 = buck2[i][j];
       fc.c_force[i][j].rhoinv = rhoinv[i][j];
diff --git a/src/USER-INTEL/pair_dpd_intel.cpp b/src/USER-INTEL/pair_dpd_intel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..09f27504a1bd2a8bc94bb0a6e4f50d5f775fe129
--- /dev/null
+++ b/src/USER-INTEL/pair_dpd_intel.cpp
@@ -0,0 +1,617 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: W. Michael Brown (Intel)
+                        Shun Xu (Computer Network Information Center, CAS)
+------------------------------------------------------------------------- */
+
+#include <math.h>
+#include "pair_dpd_intel.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "memory.h"
+#include "modify.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "neigh_request.h"
+#include "suffix.h"
+using namespace LAMMPS_NS;
+
+#define LMP_MKL_RNG VSL_BRNG_MT19937
+#define FC_PACKED1_T typename ForceConst<flt_t>::fc_packed1
+#define IEPSILON 1.0e10
+
+/* ---------------------------------------------------------------------- */
+
+PairDPDIntel::PairDPDIntel(LAMMPS *lmp) :
+  PairDPD(lmp)
+{
+  suffix_flag |= Suffix::INTEL;
+  respa_enable = 0;
+  random_thread = NULL;
+  _nrandom_thread = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+PairDPDIntel::~PairDPDIntel()
+{
+  #if defined(_OPENMP)
+  if (_nrandom_thread) {
+    #ifdef LMP_USE_MKL_RNG
+    for (int i = 0; i < _nrandom_thread; i++)
+      vslDeleteStream(&random_thread[i]);
+    #else
+    for (int i = 1; i < _nrandom_thread; i++)
+      delete random_thread[i];
+    #endif
+  }
+  #endif
+  delete []random_thread;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairDPDIntel::compute(int eflag, int vflag)
+{
+  if (fix->precision() == FixIntel::PREC_MODE_MIXED)
+    compute<float,double>(eflag, vflag, fix->get_mixed_buffers(),
+                          force_const_single);
+  else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
+    compute<double,double>(eflag, vflag, fix->get_double_buffers(),
+                           force_const_double);
+  else
+    compute<float,float>(eflag, vflag, fix->get_single_buffers(),
+                         force_const_single);
+
+  fix->balance_stamp();
+  vflag_fdotr = 0;
+}
+
+template <class flt_t, class acc_t>
+void PairDPDIntel::compute(int eflag, int vflag,
+                           IntelBuffers<flt_t,acc_t> *buffers,
+                           const ForceConst<flt_t> &fc)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag, vflag);
+  } else evflag = vflag_fdotr = 0;
+
+  const int inum = list->inum;
+  const int nthreads = comm->nthreads;
+  const int host_start = fix->host_start_pair();
+  const int offload_end = fix->offload_end_pair();
+  const int ago = neighbor->ago;
+
+  if (ago != 0 && fix->separate_buffers() == 0) {
+    fix->start_watch(TIME_PACK);
+
+    int packthreads;
+    if (nthreads > INTEL_HTHREADS) packthreads = nthreads;
+    else packthreads = 1;
+    #if defined(_OPENMP)
+    #pragma omp parallel if(packthreads > 1)
+    #endif
+    {
+      int ifrom, ito, tid;
+      IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal + atom->nghost,
+                                packthreads, sizeof(ATOM_T));
+      buffers->thr_pack(ifrom,ito,ago);
+    }
+    fix->stop_watch(TIME_PACK);
+  }
+
+  int ovflag = 0;
+  if (vflag_fdotr) ovflag = 2;
+  else if (vflag) ovflag = 1;
+  if (_onetype) {
+    if (eflag) {
+      if (force->newton_pair) {
+        eval<1,1,1>(1, ovflag, buffers, fc, 0, offload_end);
+        eval<1,1,1>(0, ovflag, buffers, fc, host_start, inum);
+      } else {
+        eval<1,1,0>(1, ovflag, buffers, fc, 0, offload_end);
+        eval<1,1,0>(0, ovflag, buffers, fc, host_start, inum);
+      }
+    } else {
+      if (force->newton_pair) {
+        eval<1,0,1>(1, ovflag, buffers, fc, 0, offload_end);
+        eval<1,0,1>(0, ovflag, buffers, fc, host_start, inum);
+      } else {
+        eval<1,0,0>(1, ovflag, buffers, fc, 0, offload_end);
+        eval<1,0,0>(0, ovflag, buffers, fc, host_start, inum);
+      }
+    }
+  } else {
+    if (eflag) {
+      if (force->newton_pair) {
+        eval<0,1,1>(1, ovflag, buffers, fc, 0, offload_end);
+        eval<0,1,1>(0, ovflag, buffers, fc, host_start, inum);
+      } else {
+        eval<0,1,0>(1, ovflag, buffers, fc, 0, offload_end);
+        eval<0,1,0>(0, ovflag, buffers, fc, host_start, inum);
+      }
+    } else {
+      if (force->newton_pair) {
+        eval<0,0,1>(1, ovflag, buffers, fc, 0, offload_end);
+        eval<0,0,1>(0, ovflag, buffers, fc, host_start, inum);
+      } else {
+        eval<0,0,0>(1, ovflag, buffers, fc, 0, offload_end);
+        eval<0,0,0>(0, ovflag, buffers, fc, host_start, inum);
+      }
+    }
+  }
+}
+
+template <int ONETYPE, int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t>
+void PairDPDIntel::eval(const int offload, const int vflag,
+                        IntelBuffers<flt_t,acc_t> *buffers,
+                        const ForceConst<flt_t> &fc,
+                        const int astart, const int aend)
+{
+  const int inum = aend - astart;
+  if (inum == 0) return;
+  int nlocal, nall, minlocal;
+  fix->get_buffern(offload, nlocal, nall, minlocal);
+
+  const int ago = neighbor->ago;
+  IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, nlocal, nall);
+
+  ATOM_T * _noalias const x = buffers->get_x(offload);
+  typedef struct { double x, y, z; } lmp_vt;
+  lmp_vt *v = (lmp_vt *)atom->v[0];
+  const flt_t dtinvsqrt = 1.0/sqrt(update->dt);
+
+  const int * _noalias const numneigh = list->numneigh;
+  const int * _noalias const cnumneigh = buffers->cnumneigh(list);
+  const int * _noalias const firstneigh = buffers->firstneigh(list);
+  const FC_PACKED1_T * _noalias const param = fc.param[0];
+  const flt_t * _noalias const special_lj = fc.special_lj;
+  int * _noalias const rngi_thread = fc.rngi;
+  const int rng_size = buffers->get_max_nbors();
+
+  const int ntypes = atom->ntypes + 1;
+  const int eatom = this->eflag_atom;
+
+  // Determine how much data to transfer
+  int x_size, q_size, f_stride, ev_size, separate_flag;
+  IP_PRE_get_transfern(ago, NEWTON_PAIR, EFLAG, vflag,
+                       buffers, offload, fix, separate_flag,
+                       x_size, q_size, ev_size, f_stride);
+
+  int tc;
+  FORCE_T * _noalias f_start;
+  acc_t * _noalias ev_global;
+  IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global);
+  const int nthreads = tc;
+  int *overflow = fix->get_off_overflow_flag();
+  {
+    #if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
+    *timer_compute = MIC_Wtime();
+    #endif
+
+    IP_PRE_repack_for_offload(NEWTON_PAIR, separate_flag, nlocal, nall,
+                              f_stride, x, 0);
+
+    acc_t oevdwl, ov0, ov1, ov2, ov3, ov4, ov5;
+    if (EFLAG) oevdwl = (acc_t)0;
+    if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
+
+    // loop over neighbors of my atoms
+    #if defined(_OPENMP)
+    #pragma omp parallel reduction(+:oevdwl,ov0,ov1,ov2,ov3,ov4,ov5)
+    #endif
+    {
+      int iifrom, iip, iito, tid;
+      IP_PRE_omp_stride_id(iifrom, iip, iito, tid, inum, nthreads);
+      iifrom += astart;
+      iito += astart;
+
+      #ifdef LMP_USE_MKL_RNG
+      VSLStreamStatePtr *my_random = &(random_thread[tid]);
+      #else
+      RanMars *my_random = random_thread[tid];
+      #endif
+      flt_t *my_rand_buffer = fc.rand_buffer_thread[tid];
+      int rngi = rngi_thread[tid];
+
+      int foff;
+      if (NEWTON_PAIR) foff = tid * f_stride - minlocal;
+      else foff = -minlocal;
+      FORCE_T * _noalias const f = f_start + foff;
+      if (NEWTON_PAIR) memset(f + minlocal, 0, f_stride * sizeof(FORCE_T));
+
+      flt_t icut, a0, gamma, sigma;
+      if (ONETYPE) {
+        icut = param[3].icut;
+        a0 = param[3].a0;
+        gamma = param[3].gamma;
+        sigma = param[3].sigma;
+      }
+      for (int i = iifrom; i < iito; i += iip) {
+        int itype, ptr_off;
+        const FC_PACKED1_T * _noalias parami;
+        if (!ONETYPE) {
+          itype = x[i].w;
+          ptr_off = itype * ntypes;
+          parami = param + ptr_off;
+        }
+
+        const int * _noalias const jlist = firstneigh + cnumneigh[i];
+        const int jnum = numneigh[i];
+
+        acc_t fxtmp, fytmp, fztmp, fwtmp;
+        acc_t sevdwl, sv0, sv1, sv2, sv3, sv4, sv5;
+
+        const flt_t xtmp = x[i].x;
+        const flt_t ytmp = x[i].y;
+        const flt_t ztmp = x[i].z;
+	const flt_t vxtmp = v[i].x;
+	const flt_t vytmp = v[i].y;
+	const flt_t vztmp = v[i].z;
+        fxtmp = fytmp = fztmp = (acc_t)0;
+        if (EFLAG) fwtmp = sevdwl = (acc_t)0;
+        if (NEWTON_PAIR == 0)
+          if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
+
+	if (rngi + jnum > rng_size) {
+          #ifdef LMP_USE_MKL_RNG
+	  if (sizeof(flt_t) == sizeof(float))
+	    vsRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, *my_random, rngi, 
+			  (float*)my_rand_buffer, (float)0.0, (float)1.0 );
+	  else
+	    vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, *my_random, rngi, 
+	  		  (double*)my_rand_buffer, 0.0, 1.0 );
+          #else
+          for (int jj = 0; jj < rngi; jj++)
+            my_rand_buffer[jj] = my_random->gaussian();
+          #endif
+	  rngi = 0;
+	}
+
+        #if defined(LMP_SIMD_COMPILER)
+	#pragma vector aligned
+	#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
+	                         sv0, sv1, sv2, sv3, sv4, sv5)
+        #endif
+        for (int jj = 0; jj < jnum; jj++) {
+          flt_t forcelj, evdwl;
+          forcelj = evdwl = (flt_t)0.0;
+
+          int j, jtype, sbindex;
+          if (!ONETYPE) {
+            sbindex = jlist[jj] >> SBBITS & 3;
+            j = jlist[jj] & NEIGHMASK;
+          } else
+            j = jlist[jj];
+
+          const flt_t delx = xtmp - x[j].x;
+          const flt_t dely = ytmp - x[j].y;
+          const flt_t delz = ztmp - x[j].z;
+          if (!ONETYPE) {
+            jtype = x[j].w;
+            icut = parami[jtype].icut;
+          }
+          const flt_t rsq = delx * delx + dely * dely + delz * delz;
+	  const flt_t rinv = (flt_t)1.0/sqrt(rsq);
+
+          if (rinv > icut) {
+            flt_t factor_dpd;
+            if (!ONETYPE) factor_dpd = special_lj[sbindex];
+
+	    flt_t delvx = vxtmp - v[j].x;
+	    flt_t delvy = vytmp - v[j].y;
+	    flt_t delvz = vztmp - v[j].z;
+	    flt_t dot = delx*delvx + dely*delvy + delz*delvz;
+	    flt_t randnum = my_rand_buffer[jj];
+
+	    flt_t iwd = rinv - icut;
+	    if (rinv > (flt_t)IEPSILON) iwd = (flt_t)0.0;
+
+	    if (!ONETYPE) {
+	      a0 = parami[jtype].a0;
+	      gamma = parami[jtype].gamma;
+	      sigma = parami[jtype].sigma;
+	    }
+	    flt_t fpair = a0 - iwd * gamma * dot + sigma * randnum * dtinvsqrt;
+	    if (!ONETYPE) fpair *= factor_dpd;
+	    fpair *= iwd;
+
+            const flt_t fpx = fpair * delx;
+            fxtmp += fpx;
+            if (NEWTON_PAIR) f[j].x -= fpx;
+            const flt_t fpy = fpair * dely;
+            fytmp += fpy;
+            if (NEWTON_PAIR) f[j].y -= fpy;
+            const flt_t fpz = fpair * delz;
+            fztmp += fpz;
+            if (NEWTON_PAIR) f[j].z -= fpz;
+
+            if (EFLAG) {
+	      flt_t cut = (flt_t)1.0/icut;
+	      flt_t r = (flt_t)1.0/rinv;
+	      evdwl = (flt_t)0.5 * a0 * (cut - (flt_t)2.0*r + rsq * icut);
+	      if (!ONETYPE) evdwl *= factor_dpd;
+              sevdwl += evdwl;
+              if (eatom) {
+                fwtmp += (flt_t)0.5 * evdwl;
+                if (NEWTON_PAIR)
+                  f[j].w += (flt_t)0.5 * evdwl;
+              }
+            }
+
+            if (NEWTON_PAIR == 0)
+              IP_PRE_ev_tally_nborv(vflag, delx, dely, delz, fpx, fpy, fpz);
+          } // if rsq
+        } // for jj
+        if (NEWTON_PAIR) {
+          f[i].x += fxtmp;
+          f[i].y += fytmp;
+          f[i].z += fztmp;
+        } else {
+          f[i].x = fxtmp;
+          f[i].y = fytmp;
+          f[i].z = fztmp;
+        }
+
+        IP_PRE_ev_tally_atom(NEWTON_PAIR, EFLAG, vflag, f, fwtmp);
+	rngi += jnum;
+      } // for ii
+
+      IP_PRE_fdotr_reduce_omp(NEWTON_PAIR, nall, minlocal, nthreads, f_start,
+                              f_stride, x, offload, vflag, ov0, ov1, ov2, ov3,
+                              ov4, ov5);
+      rngi_thread[tid] = rngi;
+    } // end omp
+
+    IP_PRE_fdotr_reduce(NEWTON_PAIR, nall, nthreads, f_stride, vflag,
+                        ov0, ov1, ov2, ov3, ov4, ov5);
+
+    if (EFLAG) {
+      if (NEWTON_PAIR == 0) oevdwl *= (acc_t)0.5;
+      ev_global[0] = oevdwl;
+      ev_global[1] = (acc_t)0.0;
+    }
+    if (vflag) {
+      if (NEWTON_PAIR == 0) {
+        ov0 *= (acc_t)0.5;
+        ov1 *= (acc_t)0.5;
+        ov2 *= (acc_t)0.5;
+        ov3 *= (acc_t)0.5;
+        ov4 *= (acc_t)0.5;
+        ov5 *= (acc_t)0.5;
+      }
+      ev_global[2] = ov0;
+      ev_global[3] = ov1;
+      ev_global[4] = ov2;
+      ev_global[5] = ov3;
+      ev_global[6] = ov4;
+      ev_global[7] = ov5;
+    }
+    #if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
+    *timer_compute = MIC_Wtime() - *timer_compute;
+    #endif
+  } // end offload
+
+  if (offload)
+    fix->stop_watch(TIME_OFFLOAD_LATENCY);
+  else
+    fix->stop_watch(TIME_HOST_PAIR);
+
+  if (EFLAG || vflag)
+    fix->add_result_array(f_start, ev_global, offload, eatom, 0, vflag);
+  else
+    fix->add_result_array(f_start, 0, offload);
+}
+
+/* ----------------------------------------------------------------------
+   global settings
+   ------------------------------------------------------------------------- */
+
+void PairDPDIntel::settings(int narg, char **arg) {
+  #if defined(_OPENMP)
+  if (_nrandom_thread) {
+    #ifdef LMP_USE_MKL_RNG
+    for (int i = 0; i < _nrandom_thread; i++)
+      vslDeleteStream(&random_thread[i]);
+    #else
+    for (int i = 1; i < _nrandom_thread; i++)
+      delete random_thread[i];
+    #endif
+  }
+  delete []random_thread;
+  #endif
+  PairDPD::settings(narg,arg);
+  _nrandom_thread = comm->nthreads;
+
+  #ifdef LMP_USE_MKL_RNG
+
+  random_thread=new VSLStreamStatePtr[comm->nthreads];
+  #if defined(_OPENMP)
+  #pragma omp parallel
+  {
+    int tid = omp_get_thread_num();
+    vslNewStream(&random_thread[tid], LMP_MKL_RNG, 
+		 seed + comm->me + comm->nprocs * tid );
+  }
+  #endif
+
+  #else
+
+  random_thread =new RanMars*[comm->nthreads];
+  random_thread[0] = random;
+  #if defined(_OPENMP)
+  #pragma omp parallel
+  {
+    int tid = omp_get_thread_num();
+    if (tid > 0)
+      random_thread[tid] = new RanMars(lmp, seed+comm->me+comm->nprocs*tid);
+  }
+  #endif
+
+  #endif
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairDPDIntel::init_style()
+{
+  PairDPD::init_style();
+  if (force->newton_pair == 0) {
+    neighbor->requests[neighbor->nrequest-1]->half = 0;
+    neighbor->requests[neighbor->nrequest-1]->full = 1;
+  }
+  neighbor->requests[neighbor->nrequest-1]->intel = 1;
+
+  int ifix = modify->find_fix("package_intel");
+  if (ifix < 0)
+    error->all(FLERR,
+               "The 'package intel' command is required for /intel styles");
+  fix = static_cast<FixIntel *>(modify->fix[ifix]);
+
+  fix->pair_init_check();
+  #ifdef _LMP_INTEL_OFFLOAD
+  if (fix->offload_balance() != 0.0)
+    error->all(FLERR,
+          "Offload for dpd/intel is not yet available. Set balance to 0.");
+  #endif
+
+  if (fix->precision() == FixIntel::PREC_MODE_MIXED)
+    pack_force_const(force_const_single, fix->get_mixed_buffers());
+  else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
+    pack_force_const(force_const_double, fix->get_double_buffers());
+  else
+    pack_force_const(force_const_single, fix->get_single_buffers());
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <class flt_t, class acc_t>
+void PairDPDIntel::pack_force_const(ForceConst<flt_t> &fc,
+                                    IntelBuffers<flt_t,acc_t> *buffers)
+{
+  _onetype = 0;
+  if (atom->ntypes == 1 && !atom->molecular) _onetype = 1;
+
+  int tp1 = atom->ntypes + 1;
+  fc.set_ntypes(tp1,comm->nthreads,buffers->get_max_nbors(),memory,_cop);
+  buffers->set_ntypes(tp1);
+  flt_t **cutneighsq = buffers->get_cutneighsq();
+
+  // Repeat cutsq calculation because done after call to init_style
+  double cut, cutneigh;
+  for (int i = 1; i <= atom->ntypes; i++) {
+    for (int j = i; j <= atom->ntypes; j++) {
+      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
+        cut = init_one(i,j);
+        cutneigh = cut + neighbor->skin;
+        cutsq[i][j] = cutsq[j][i] = cut*cut;
+        cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh;
+        double icut = 1.0 / cut;
+        fc.param[i][j].icut = fc.param[j][i].icut = icut;
+      } else {
+        cut = init_one(i,j);
+        double icut = 1.0 / cut;
+        fc.param[i][j].icut = fc.param[j][i].icut = icut;
+      }
+    }
+  }
+
+  for (int i = 0; i < 4; i++) {
+    fc.special_lj[i] = force->special_lj[i];
+    fc.special_lj[0] = 1.0;
+  }
+
+  for (int i = 1; i < tp1; i++) {
+    for (int j = 1; j < tp1; j++) {
+      fc.param[i][j].a0 = a0[i][j];
+      fc.param[i][j].gamma = gamma[i][j];
+      fc.param[i][j].sigma = sigma[i][j];
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <class flt_t>
+void PairDPDIntel::ForceConst<flt_t>::set_ntypes(const int ntypes,
+                                                 const int nthreads,
+						 const int max_nbors,
+                                                 Memory *memory,
+                                                 const int cop) {
+  if (ntypes != _ntypes) {
+    if (_ntypes > 0) {
+      _memory->destroy(param);
+      _memory->destroy(rand_buffer_thread);
+      _memory->destroy(rngi);
+    }
+    if (ntypes > 0) {
+      _cop = cop;
+      memory->create(param,ntypes,ntypes,"fc.param");
+      memory->create(rand_buffer_thread, nthreads, max_nbors, 
+		     "fc.rand_buffer_thread");
+      memory->create(rngi,nthreads,"fc.param");
+      for (int i = 0; i < nthreads; i++) rngi[i] = max_nbors;
+    }
+  }
+  _ntypes = ntypes;
+  _memory = memory;
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads from restart file, bcasts
+   ------------------------------------------------------------------------- */
+
+void PairDPDIntel::read_restart_settings(FILE *fp)
+{
+  #if defined(_OPENMP)
+  if (_nrandom_thread) {
+    #ifdef LMP_USE_MKL_RNG
+    for (int i = 0; i < _nrandom_thread; i++)
+      vslDeleteStream(&random_thread[i]);
+    #else
+    for (int i = 1; i < _nrandom_thread; i++)
+      delete random_thread[i];
+    #endif
+  }
+  delete []random_thread;
+  #endif
+  PairDPD::read_restart_settings(fp);
+  _nrandom_thread = comm->nthreads;
+
+  #ifdef LMP_USE_MKL_RNG
+
+  random_thread=new VSLStreamStatePtr[comm->nthreads];
+  #if defined(_OPENMP)
+  #pragma omp parallel
+  {
+    int tid = omp_get_thread_num();
+    vslNewStream(&random_thread[tid], LMP_MKL_RNG, 
+		 seed + comm->me + comm->nprocs * tid );
+  }
+  #endif
+
+  #else
+
+  random_thread =new RanMars*[comm->nthreads];
+  random_thread[0] = random;
+  #if defined(_OPENMP)
+  #pragma omp parallel
+  {
+    int tid = omp_get_thread_num();
+    if (tid > 0)
+      random_thread[tid] = new RanMars(lmp, seed+comm->me+comm->nprocs*tid);
+  }
+  #endif
+
+  #endif
+}
diff --git a/src/USER-INTEL/pair_dpd_intel.h b/src/USER-INTEL/pair_dpd_intel.h
new file mode 100644
index 0000000000000000000000000000000000000000..416d873c0009ef9864e4beb0cb3fd10b84fcc4a0
--- /dev/null
+++ b/src/USER-INTEL/pair_dpd_intel.h
@@ -0,0 +1,110 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: W. Michael Brown (Intel)
+                        Shun Xu (Computer Network Information Center, CAS)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(dpd/intel,PairDPDIntel)
+
+#else
+
+#ifndef LMP_PAIR_DPD_INTEL_H
+#define LMP_PAIR_DPD_INTEL_H
+
+#include "pair_dpd.h"
+#include "fix_intel.h"
+
+#ifdef LMP_USE_MKL_RNG
+#include "mkl_vsl.h"
+#else
+#include "random_mars.h"
+#endif
+
+namespace LAMMPS_NS {
+
+class PairDPDIntel : public PairDPD {
+
+ public:
+  PairDPDIntel(class LAMMPS *);
+  ~PairDPDIntel();
+
+  virtual void compute(int, int);
+  void settings(int, char **);
+  void init_style();
+  void read_restart_settings(FILE *);
+
+ private:
+  FixIntel *fix;
+  int _cop, _onetype, _nrandom_thread;
+
+  #ifdef LMP_USE_MKL_RNG
+  VSLStreamStatePtr *random_thread;
+  #else
+  RanMars **random_thread;
+  #endif
+
+  template <class flt_t> class ForceConst;
+  template <class flt_t, class acc_t>
+  void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers,
+               const ForceConst<flt_t> &fc);
+  template <int ONETYPE, int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t>
+  void eval(const int offload, const int vflag,
+            IntelBuffers<flt_t,acc_t> * buffers,
+            const ForceConst<flt_t> &fc, const int astart, const int aend);
+
+  template <class flt_t, class acc_t>
+  void pack_force_const(ForceConst<flt_t> &fc,
+                        IntelBuffers<flt_t, acc_t> *buffers);
+
+  // ----------------------------------------------------------------------
+
+  template <class flt_t>
+  class ForceConst {
+   public:
+    typedef struct { flt_t icut, a0, gamma, sigma; } fc_packed1;
+
+    _alignvar(flt_t special_lj[4],64);
+    fc_packed1 **param;
+    flt_t **rand_buffer_thread;
+    int *rngi;
+
+    ForceConst() : _ntypes(0)  {}
+    ~ForceConst() { set_ntypes(0, 0, 0, NULL, _cop); }
+
+    void set_ntypes(const int ntypes, const int nthreads, const int max_nbors, 
+                    Memory *memory, const int cop);
+
+   private:
+    int _ntypes, _cop;
+    Memory *_memory;
+  };
+  ForceConst<float> force_const_single;
+  ForceConst<double> force_const_double;
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+E: The 'package intel' command is required for /intel styles
+
+Self-explanatory.
+
+*/
diff --git a/src/USER-INTEL/pair_gayberne_intel.cpp b/src/USER-INTEL/pair_gayberne_intel.cpp
index 3fbb58308b0d37bfeea2f918cf44de0e7e67a8e5..1f05ad0efc82c917f847a26e4e009c90f1643a2d 100644
--- a/src/USER-INTEL/pair_gayberne_intel.cpp
+++ b/src/USER-INTEL/pair_gayberne_intel.cpp
@@ -938,8 +938,8 @@ void PairGayBerneIntel::pack_force_const(ForceConst<flt_t> &fc,
   fc.upsilon = upsilon;
   fc.mu = mu;
 
-  for (int i = 0; i < tp1; i++) {
-    for (int j = 0; j < tp1; j++) {
+  for (int i = 1; i < tp1; i++) {
+    for (int j = 1; j < tp1; j++) {
       fc.ijc[i][j].lj1 = lj1[i][j];
       fc.ijc[i][j].lj2 = lj2[i][j];
       fc.ijc[i][j].cutsq = cutsq[i][j];
diff --git a/src/USER-INTEL/pair_lj_charmm_coul_charmm_intel.cpp b/src/USER-INTEL/pair_lj_charmm_coul_charmm_intel.cpp
index 0dc2c275e87d3d583da1b42a4a493ae04cdd4469..e3afcd64a6e30f04506698b112dc9194606294a3 100644
--- a/src/USER-INTEL/pair_lj_charmm_coul_charmm_intel.cpp
+++ b/src/USER-INTEL/pair_lj_charmm_coul_charmm_intel.cpp
@@ -521,8 +521,8 @@ void PairLJCharmmCoulCharmmIntel::pack_force_const(ForceConst<flt_t> &fc,
     fc.special_lj[0] = 1.0;
   }
 
-  for (int i = 0; i < tp1; i++) {
-    for (int j = 0; j < tp1; j++) {
+  for (int i = 1; i < tp1; i++) {
+    for (int j = 1; j < tp1; j++) {
       fc.lj[i][j].x = lj1[i][j];
       fc.lj[i][j].y = lj2[i][j];
       fc.lj[i][j].z = lj3[i][j];
diff --git a/src/USER-INTEL/pair_lj_charmm_coul_long_intel.cpp b/src/USER-INTEL/pair_lj_charmm_coul_long_intel.cpp
index fe99525122c17ae456241ae8f33e6f2deb1bf82a..a2680cdff41bb964cf39e42f85c140837e08fca8 100644
--- a/src/USER-INTEL/pair_lj_charmm_coul_long_intel.cpp
+++ b/src/USER-INTEL/pair_lj_charmm_coul_long_intel.cpp
@@ -586,8 +586,8 @@ void PairLJCharmmCoulLongIntel::pack_force_const(ForceConst<flt_t> &fc,
     fc.special_lj[0] = 1.0;
   }
 
-  for (int i = 0; i < tp1; i++) {
-    for (int j = 0; j < tp1; j++) {
+  for (int i = 1; i < tp1; i++) {
+    for (int j = 1; j < tp1; j++) {
       fc.lj[i][j].x = lj1[i][j];
       fc.lj[i][j].y = lj2[i][j];
       fc.lj[i][j].z = lj3[i][j];
diff --git a/src/USER-INTEL/pair_lj_cut_coul_long_intel.cpp b/src/USER-INTEL/pair_lj_cut_coul_long_intel.cpp
index e9775d6ec5c96b97bcd89ae39925796474bdca86..c0bf6f35c6e6b220f33d6d6bbb00232d8fcce3bd 100644
--- a/src/USER-INTEL/pair_lj_cut_coul_long_intel.cpp
+++ b/src/USER-INTEL/pair_lj_cut_coul_long_intel.cpp
@@ -544,8 +544,8 @@ void PairLJCutCoulLongIntel::pack_force_const(ForceConst<flt_t> &fc,
     fc.special_lj[0] = 1.0;
   }
 
-  for (int i = 0; i < tp1; i++) {
-    for (int j = 0; j < tp1; j++) {
+  for (int i = 1; i < tp1; i++) {
+    for (int j = 1; j < tp1; j++) {
       if (cutsq[i][j] < cut_ljsq[i][j])
         error->all(FLERR,
          "Intel variant of lj/cut/coul/long expects lj cutoff<=coulombic");
diff --git a/src/USER-INTEL/pair_lj_cut_intel.cpp b/src/USER-INTEL/pair_lj_cut_intel.cpp
index 4871821842dd327e8d2da1e1a0bb02a8e4146ae9..f5a7999ee2e9cf13280167af8eecb008dbca59fe 100644
--- a/src/USER-INTEL/pair_lj_cut_intel.cpp
+++ b/src/USER-INTEL/pair_lj_cut_intel.cpp
@@ -437,8 +437,8 @@ void PairLJCutIntel::pack_force_const(ForceConst<flt_t> &fc,
     fc.special_lj[0] = 1.0;
   }
 
-  for (int i = 0; i < tp1; i++) {
-    for (int j = 0; j < tp1; j++) {
+  for (int i = 1; i < tp1; i++) {
+    for (int j = 1; j < tp1; j++) {
       fc.ljc12o[i][j].lj1 = lj1[i][j];
       fc.ljc12o[i][j].lj2 = lj2[i][j];
       fc.lj34[i][j].lj3 = lj3[i][j];
diff --git a/src/USER-INTEL/pppm_intel.cpp b/src/USER-INTEL/pppm_intel.cpp
index f1cfe591f2a1365ffc8e2f3b68c6cad1dc5cf0bf..db855b75ef8f877303ae6186e12fc59314729397 100644
--- a/src/USER-INTEL/pppm_intel.cpp
+++ b/src/USER-INTEL/pppm_intel.cpp
@@ -66,11 +66,7 @@ PPPMIntel::PPPMIntel(LAMMPS *lmp, int narg, char **arg) : PPPM(lmp, narg, arg)
   rho_lookup = drho_lookup = NULL;
   rho_points = 0;
 
-  vdxy_brick = vdz0_brick = NULL;
-  work3 = NULL;
-  cg_pack = NULL;
-
-  _use_table = _use_packing = _use_lrt = 0;
+  _use_table = _use_lrt = 0;
 }
 
 PPPMIntel::~PPPMIntel()
@@ -82,12 +78,6 @@ PPPMIntel::~PPPMIntel()
 
   memory->destroy(rho_lookup);
   memory->destroy(drho_lookup);
-
-  memory->destroy3d_offset(vdxy_brick, nzlo_out, nylo_out, 2*nxlo_out);
-  memory->destroy3d_offset(vdz0_brick, nzlo_out, nylo_out, 2*nxlo_out);
-  memory->destroy(work3);
-
-  delete cg_pack;
 }
 
 /* ----------------------------------------------------------------------
@@ -141,36 +131,6 @@ void PPPMIntel::init()
   if (order > INTEL_P3M_MAXORDER)
     error->all(FLERR,"PPPM order greater than supported by USER-INTEL\n");
 
-  _use_packing = (order == 7) && (INTEL_VECTOR_WIDTH == 16)
-                              && (sizeof(FFT_SCALAR) == sizeof(float))
-                              && (differentiation_flag == 0);
-  if (_use_packing) {
-    memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out);
-    memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out);
-    memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out);
-    memory->destroy3d_offset(vdxy_brick, nzlo_out, nylo_out, 2*nxlo_out);
-    create3d_offset(vdxy_brick, nzlo_out, nzhi_out+2,
-		    nylo_out, nyhi_out, 2*nxlo_out, 2*nxhi_out+1,
-		    "pppmintel:vdxy_brick");
-    memory->destroy3d_offset(vdz0_brick, nzlo_out, nylo_out, 2*nxlo_out);
-    create3d_offset(vdz0_brick, nzlo_out, nzhi_out+2,
-		    nylo_out, nyhi_out, 2*nxlo_out, 2*nxhi_out+1,
-		    "pppmintel:vdz0_brick");
-    memory->destroy(work3);
-    memory->create(work3, 2*nfft_both, "pppmintel:work3");
-
-    // new communicator for the double-size bricks
-    delete cg_pack;
-    int (*procneigh)[2] = comm->procneigh;
-    cg_pack = new GridComm(lmp,world,2,0, 2*nxlo_in,2*nxhi_in+1,nylo_in,
-                           nyhi_in,nzlo_in,nzhi_in, 2*nxlo_out,2*nxhi_out+1,
-                           nylo_out,nyhi_out,nzlo_out,nzhi_out,
-                           procneigh[0][0],procneigh[0][1],procneigh[1][0],
-                           procneigh[1][1],procneigh[2][0],procneigh[2][1]);
-
-    cg_pack->ghost_notify();
-    cg_pack->setup();
-  }
 }
 
 /* ----------------------------------------------------------------------
@@ -272,18 +232,13 @@ void PPPMIntel::compute_first(int eflag, int vflag)
   // also performs per-atom calculations via poisson_peratom()
 
   if (differentiation_flag == 1) poisson_ad();
-  else poisson_ik_intel();
+  else poisson_ik();
 
   // all procs communicate E-field values
   // to fill ghost cells surrounding their 3d bricks
 
   if (differentiation_flag == 1) cg->forward_comm(this,FORWARD_AD);
-  else {
-    if (_use_packing)
-      cg_pack->forward_comm(this,FORWARD_IK);
-    else
-      cg->forward_comm(this,FORWARD_IK);
-  }
+  else cg->forward_comm(this,FORWARD_IK);
 
   // extra per-atom energy/virial communication
 
@@ -604,7 +559,7 @@ void PPPMIntel::make_rho(IntelBuffers<flt_t,acc_t> *buffers)
    interpolate from grid to get electric field & force on my particles for ik
 ------------------------------------------------------------------------- */
 
-template<class flt_t, class acc_t, int use_table, int use_packing>
+template<class flt_t, class acc_t, int use_table>
 void PPPMIntel::fieldforce_ik(IntelBuffers<flt_t,acc_t> *buffers)
 {
   // loop over my charges, interpolate electric field from nearby grid points
@@ -649,9 +604,9 @@ void PPPMIntel::fieldforce_ik(IntelBuffers<flt_t,acc_t> *buffers)
       int ny = part2grid[i][1];
       int nz = part2grid[i][2];
 
-      int nxsum = (use_packing ? 2 : 1) * (nx + nlower);
+      int nxsum = nx + nlower;
       int nysum = ny + nlower;
-      int nzsum = nz + nlower;;
+      int nzsum = nz + nlower;
 
       FFT_SCALAR dx = nx+fshiftone - (x[i].x-lo0)*xi;
       FFT_SCALAR dy = ny+fshiftone - (x[i].y-lo1)*yi;
@@ -668,12 +623,7 @@ void PPPMIntel::fieldforce_ik(IntelBuffers<flt_t,acc_t> *buffers)
         #pragma simd
         #endif
         for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
-          if (use_packing) {
-            rho0[2 * k] = rho_lookup[idx][k];
-            rho0[2 * k + 1] = rho_lookup[idx][k];
-          } else {
-            rho0[k] = rho_lookup[idx][k];
-          }
+	  rho0[k] = rho_lookup[idx][k];
           rho1[k] = rho_lookup[idy][k];
           rho2[k] = rho_lookup[idz][k];
         }
@@ -690,12 +640,7 @@ void PPPMIntel::fieldforce_ik(IntelBuffers<flt_t,acc_t> *buffers)
             r2 = rho_coeff[l][k] + r2*dy;
             r3 = rho_coeff[l][k] + r3*dz;
           }
-          if (use_packing) {
-            rho0[2 * (k-nlower)] = r1;
-            rho0[2 * (k-nlower) + 1] = r1;
-          } else {
-            rho0[k-nlower] = r1;
-          }
+	  rho0[k-nlower] = r1;
           rho1[k-nlower] = r2;
           rho2[k-nlower] = r3;
         }
@@ -722,18 +667,12 @@ void PPPMIntel::fieldforce_ik(IntelBuffers<flt_t,acc_t> *buffers)
           #if defined(LMP_SIMD_COMPILER)
           #pragma simd
           #endif
-          for (int l = 0; l < (use_packing ? 2 : 1) *
-                 INTEL_P3M_ALIGNED_MAXORDER; l++) {
+          for (int l = 0; l < INTEL_P3M_ALIGNED_MAXORDER; l++) {
             int mx = l+nxsum;
             FFT_SCALAR x0 = y0*rho0[l];
-            if (use_packing) {
-              ekxy_arr[l] -= x0*vdxy_brick[mz][my][mx];
-              ekz0_arr[l] -= x0*vdz0_brick[mz][my][mx];
-            } else {
-              ekx_arr[l] -= x0*vdx_brick[mz][my][mx];
-              eky_arr[l] -= x0*vdy_brick[mz][my][mx];
-              ekz_arr[l] -= x0*vdz_brick[mz][my][mx];
-            }
+	    ekx_arr[l] -= x0*vdx_brick[mz][my][mx];
+	    eky_arr[l] -= x0*vdy_brick[mz][my][mx];
+	    ekz_arr[l] -= x0*vdz_brick[mz][my][mx];
           }
         }
       }
@@ -741,18 +680,10 @@ void PPPMIntel::fieldforce_ik(IntelBuffers<flt_t,acc_t> *buffers)
       FFT_SCALAR ekx, eky, ekz;
       ekx = eky = ekz = ZEROF;
 
-      if (use_packing) {
-        for (int l = 0; l < 2*order; l += 2) {
-          ekx += ekxy_arr[l];
-          eky += ekxy_arr[l+1];
-          ekz += ekz0_arr[l];
-        }
-      } else {
-        for (int l = 0; l < order; l++) {
-          ekx += ekx_arr[l];
-          eky += eky_arr[l];
-          ekz += ekz_arr[l];
-        }
+      for (int l = 0; l < order; l++) {
+	ekx += ekx_arr[l];
+	eky += eky_arr[l];
+	ekz += ekz_arr[l];
       }
 
       // convert E-field to force
@@ -965,137 +896,6 @@ void PPPMIntel::fieldforce_ad(IntelBuffers<flt_t,acc_t> *buffers)
   }
 }
 
-/* ----------------------------------------------------------------------
-   FFT-based Poisson solver for ik
-   Does special things for packing mode to avoid repeated copies
-------------------------------------------------------------------------- */
-
-void PPPMIntel::poisson_ik_intel()
-{
-  if (_use_packing == 0) {
-    poisson_ik();
-    return;
-  }
-
-  int i,j,k,n;
-  double eng;
-
-  // transform charge density (r -> k)
-
-  n = 0;
-  for (i = 0; i < nfft; i++) {
-    work1[n++] = density_fft[i];
-    work1[n++] = ZEROF;
-  }
-
-  fft1->compute(work1,work1,1);
-
-  // global energy and virial contribution
-
-  double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
-  double s2 = scaleinv*scaleinv;
-
-  if (eflag_global || vflag_global) {
-    if (vflag_global) {
-      n = 0;
-      for (i = 0; i < nfft; i++) {
-        eng = s2 * greensfn[i] * (work1[n]*work1[n] +
-                                  work1[n+1]*work1[n+1]);
-        for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j];
-        if (eflag_global) energy += eng;
-        n += 2;
-      }
-    } else {
-      n = 0;
-      for (i = 0; i < nfft; i++) {
-        energy +=
-          s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
-        n += 2;
-      }
-    }
-  }
-
-  // scale by 1/total-grid-pts to get rho(k)
-  // multiply by Green's function to get V(k)
-
-  n = 0;
-  for (i = 0; i < nfft; i++) {
-    work1[n++] *= scaleinv * greensfn[i];
-    work1[n++] *= scaleinv * greensfn[i];
-  }
-
-  // extra FFTs for per-atom energy/virial
-
-  if (evflag_atom) poisson_peratom();
-
-  // triclinic system
-
-  if (triclinic) {
-    poisson_ik_triclinic();
-    return;
-  }
-
-  // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
-  // FFT leaves data in 3d brick decomposition
-  // copy it into inner portion of vdx,vdy,vdz arrays
-
-  // x direction gradient
-  n = 0;
-  for (k = nzlo_fft; k <= nzhi_fft; k++)
-    for (j = nylo_fft; j <= nyhi_fft; j++)
-      for (i = nxlo_fft; i <= nxhi_fft; i++) {
-        work2[n] = fkx[i]*work1[n+1];
-        work2[n+1] = -fkx[i]*work1[n];
-        n += 2;
-      }
-
-  fft2->compute(work2,work2,-1);
-
-  // y direction gradient
-
-  n = 0;
-  for (k = nzlo_fft; k <= nzhi_fft; k++)
-    for (j = nylo_fft; j <= nyhi_fft; j++)
-      for (i = nxlo_fft; i <= nxhi_fft; i++) {
-        work3[n] = fky[j]*work1[n+1];
-        work3[n+1] = -fky[j]*work1[n];
-        n += 2;
-      }
-
-  fft2->compute(work3,work3,-1);
-
-  n = 0;
-  for (k = nzlo_in; k <= nzhi_in; k++)
-    for (j = nylo_in; j <= nyhi_in; j++)
-      for (i = nxlo_in; i <= nxhi_in; i++) {
-        vdxy_brick[k][j][2*i] = work2[n];
-        vdxy_brick[k][j][2*i+1] = work3[n];
-        n += 2;
-      }
-
-  // z direction gradient
-
-  n = 0;
-  for (k = nzlo_fft; k <= nzhi_fft; k++)
-    for (j = nylo_fft; j <= nyhi_fft; j++)
-      for (i = nxlo_fft; i <= nxhi_fft; i++) {
-        work2[n] = fkz[k]*work1[n+1];
-        work2[n+1] = -fkz[k]*work1[n];
-        n += 2;
-      }
-
-  fft2->compute(work2,work2,-1);
-
-  n = 0;
-  for (k = nzlo_in; k <= nzhi_in; k++)
-    for (j = nylo_in; j <= nyhi_in; j++)
-      for (i = nxlo_in; i <= nxhi_in; i++) {
-        vdz0_brick[k][j][2*i] = work2[n];
-        vdz0_brick[k][j][2*i+1] = 0.;
-        n += 2;
-      }
-}
-
 /* ----------------------------------------------------------------------
    precompute rho coefficients as a lookup table to save time in make_rho
    and fieldforce.  Instead of doing this polynomial for every atom 6 times
@@ -1141,46 +941,6 @@ void PPPMIntel::precompute_rho()
   }
 }
 
-/* ----------------------------------------------------------------------
-   pack own values to buf to send to another proc
-------------------------------------------------------------------------- */
-
-void PPPMIntel::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
-{
-  int n = 0;
-
-  if ((flag == FORWARD_IK) && _use_packing) {
-    FFT_SCALAR *xsrc = &vdxy_brick[nzlo_out][nylo_out][2*nxlo_out];
-    FFT_SCALAR *zsrc = &vdz0_brick[nzlo_out][nylo_out][2*nxlo_out];
-    for (int i = 0; i < nlist; i++) {
-      buf[n++] = xsrc[list[i]];
-      buf[n++] = zsrc[list[i]];
-    }
-  } else {
-    PPPM::pack_forward(flag, buf, nlist, list);
-  }
-}
-
-/* ----------------------------------------------------------------------
-   unpack another proc's own values from buf and set own ghost values
-------------------------------------------------------------------------- */
-
-void PPPMIntel::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
-{
-  int n = 0;
-
-  if ((flag == FORWARD_IK) && _use_packing) {
-    FFT_SCALAR *xdest = &vdxy_brick[nzlo_out][nylo_out][2*nxlo_out];
-    FFT_SCALAR *zdest = &vdz0_brick[nzlo_out][nylo_out][2*nxlo_out];
-    for (int i = 0; i < nlist; i++) {
-      xdest[list[i]] = buf[n++];
-      zdest[list[i]] = buf[n++];
-    }
-  } else {
-    PPPM::unpack_forward(flag, buf, nlist, list);
-  }
-}
-
 /* ----------------------------------------------------------------------
    memory usage of local arrays
 ------------------------------------------------------------------------- */
@@ -1201,14 +961,6 @@ double PPPMIntel::memory_usage()
       bytes += rho_points * INTEL_P3M_ALIGNED_MAXORDER * sizeof(FFT_SCALAR);
     }
   }
-  if (_use_packing) {
-    bytes += 2 * (nzhi_out + 2 - nzlo_out + 1) * (nyhi_out - nylo_out + 1)
-               * (2 * nxhi_out + 1 - 2 * nxlo_out + 1) * sizeof(FFT_SCALAR);
-    bytes -= 3 * (nxhi_out - nxlo_out + 1) * (nyhi_out - nylo_out + 1)
-               * (nzhi_out - nzlo_out + 1) * sizeof(FFT_SCALAR);
-    bytes += 2 * nfft_both * sizeof(FFT_SCALAR);
-    bytes += cg_pack->memory_usage();
-  }
   return bytes;
 }
 
diff --git a/src/USER-INTEL/pppm_intel.h b/src/USER-INTEL/pppm_intel.h
index 5bffabe0e5f2e3ed07e22aa436ffa706d8dae9da..2a57372558ee7d13dc43717596794e0dc8924e53 100644
--- a/src/USER-INTEL/pppm_intel.h
+++ b/src/USER-INTEL/pppm_intel.h
@@ -38,8 +38,6 @@ class PPPMIntel : public PPPM {
   virtual ~PPPMIntel();
   virtual void init();
   virtual void compute(int, int);
-  virtual void pack_forward(int, FFT_SCALAR *, int, int *);
-  virtual void unpack_forward(int, FFT_SCALAR *, int, int *);
   virtual double memory_usage();
   void compute_first(int, int);
   void compute_second(int, int);
@@ -64,12 +62,6 @@ class PPPMIntel : public PPPM {
   FFT_SCALAR **drho_lookup;
   FFT_SCALAR half_rho_scale, half_rho_scale_plus;
 
-  int _use_packing;
-  FFT_SCALAR ***vdxy_brick;
-  FFT_SCALAR ***vdz0_brick;
-  FFT_SCALAR *work3;
-  class GridComm *cg_pack;
-
   #ifdef _LMP_INTEL_OFFLOAD
   int _use_base;
   #endif
@@ -92,23 +84,14 @@ class PPPMIntel : public PPPM {
       make_rho<flt_t,acc_t,0>(buffers);
     }
   }
-  void poisson_ik_intel();
-  template<class flt_t, class acc_t, int use_table, int use_packing>
+  template<class flt_t, class acc_t, int use_table>
   void fieldforce_ik(IntelBuffers<flt_t,acc_t> *buffers);
   template<class flt_t, class acc_t>
   void fieldforce_ik(IntelBuffers<flt_t,acc_t> *buffers) {
     if (_use_table == 1) {
-      if (_use_packing == 1) {
-        fieldforce_ik<flt_t, acc_t, 1, 1>(buffers);
-      } else {
-        fieldforce_ik<flt_t, acc_t, 1, 0>(buffers);
-      }
+      fieldforce_ik<flt_t, acc_t, 1>(buffers);
     } else {
-      if (_use_packing == 1) {
-        fieldforce_ik<flt_t, acc_t, 0, 1>(buffers);
-      } else {
-        fieldforce_ik<flt_t, acc_t, 0, 0>(buffers);
-      }
+      fieldforce_ik<flt_t, acc_t, 0>(buffers);
     }
   }
   template<class flt_t, class acc_t, int use_table>
diff --git a/src/USER-INTEL/verlet_lrt_intel.cpp b/src/USER-INTEL/verlet_lrt_intel.cpp
index 81f458614372cca3a6d1e74a5544db263ac60b67..9ff5f8517608fa74c7c9916824aa723247ef3fa6 100644
--- a/src/USER-INTEL/verlet_lrt_intel.cpp
+++ b/src/USER-INTEL/verlet_lrt_intel.cpp
@@ -68,7 +68,7 @@ void VerletLRTIntel::init()
 
   _intel_kspace = (PPPMIntel*)(force->kspace_match("pppm/intel", 0));
 
-  #ifdef LMP_INTEL_NOLRT
+  #ifndef LMP_INTEL_USELRT
   error->all(FLERR,
              "LRT otion for Intel package disabled at compile time");
   #endif
diff --git a/src/USER-INTEL/verlet_lrt_intel.h b/src/USER-INTEL/verlet_lrt_intel.h
index 813cd536050d44f54c2bc3379756970eb69c8320..0d7154ff643ee7a71af39057b13668345939ce1d 100644
--- a/src/USER-INTEL/verlet_lrt_intel.h
+++ b/src/USER-INTEL/verlet_lrt_intel.h
@@ -23,10 +23,7 @@ IntegrateStyle(verlet/lrt/intel,VerletLRTIntel)
 #include "verlet.h"
 #include "pppm_intel.h"
 
-#ifndef LMP_INTEL_USELRT
-#define LMP_INTEL_NOLRT
-#else
-
+#ifdef LMP_INTEL_USELRT
 #ifdef LMP_INTEL_LRT11
 #define _LMP_INTEL_LRT_11
 #include <thread>
diff --git a/src/USER-MANIFOLD/manifold_gaussian_bump.cpp b/src/USER-MANIFOLD/manifold_gaussian_bump.cpp
index db8c589afb092feb0e3d1b853e034618317e7fb8..a9ee35bbfc5f23628c2943e70ea8043777028af1 100644
--- a/src/USER-MANIFOLD/manifold_gaussian_bump.cpp
+++ b/src/USER-MANIFOLD/manifold_gaussian_bump.cpp
@@ -134,7 +134,7 @@ public:
 // Manifold itself:
 manifold_gaussian_bump::manifold_gaussian_bump(class LAMMPS* lmp,
                                                int narg, char **arg)
-	: manifold(lmp), lut_z(NULL), lut_zp(NULL) {}
+        : manifold(lmp), lut_z(NULL), lut_zp(NULL) {}
 
 
 manifold_gaussian_bump::~manifold_gaussian_bump()
@@ -361,13 +361,13 @@ void manifold_gaussian_bump::test_lut()
     n( x, nn );
     double taper_z;
     if( xx <= rc1 ){
-	    taper_z = gaussian_bump(xx);
+            taper_z = gaussian_bump(xx);
     }else if( xx < rc2 ){
-	    taper_z = lut_get_z( xx );
+            taper_z = lut_get_z( xx );
     }else{
-	    taper_z = 0.0;
+            taper_z = 0.0;
     }
-    fprintf( fp, "%g %g %g %g %g\n", xx, gaussian_bump(xx), taper_z,
+    fprintf( fp, "%g %g %g %g %g %g %g\n", xx, gaussian_bump(xx), taper_z,
              gg, nn[0], nn[1], nn[2] );
   }
   fclose(fp);
diff --git a/src/USER-MANIFOLD/manifold_plane_wiggle.cpp b/src/USER-MANIFOLD/manifold_plane_wiggle.cpp
index fd50e774dafb08e7a3be7689ed1081e2fd3a7cef..136c52ab361a2023ee2442fc038039a8d2c1a7d0 100644
--- a/src/USER-MANIFOLD/manifold_plane_wiggle.cpp
+++ b/src/USER-MANIFOLD/manifold_plane_wiggle.cpp
@@ -24,5 +24,5 @@ void manifold_plane_wiggle::n( const double *x, double *n )
   double w = params[1];
   n[2] = 1;
   n[1] = 0.0;
-  n[0] = -a*w*cos(x[0]);
+  n[0] = -a*w*cos(w*x[0]);
 }
diff --git a/src/USER-MISC/README b/src/USER-MISC/README
index 65146abd5436cd2b0e7ddc2afc66ebfdfa36d463..5af5b22eb7889609834a4449bb5a118e75789c86 100644
--- a/src/USER-MISC/README
+++ b/src/USER-MISC/README
@@ -47,6 +47,7 @@ fix imd, Axel Kohlmeyer, akohlmey at gmail.com, 9 Nov 2009
 fix ipi, Michele Ceriotti (EPFL Lausanne), michele.ceriotti at gmail.com, 24 Nov 2014
 fix nvk, Efrem Braun (UC Berkeley), efrem.braun at gmail.com, https://github.com/lammps/lammps/pull/310
 fix pimd, Yuxing Peng (U Chicago), yuxing at uchicago.edu, 24 Nov 2014
+fix rhok, Ulf Pedersen (Roskilde U), ulf at urp.dk, 25 Sep 2017
 fix smd, Axel Kohlmeyer, akohlmey at gmail.com, 19 May 2008
 fix ti/spring, Rodrigo Freitas (Unicamp/Brazil), rodrigohb at gmail.com, 7 Nov 2013
 fix ttm/mod, Sergey Starikov and Vasily Pisarev (JIHT), pisarevvv at gmail.com, 2 Feb 2015
diff --git a/src/USER-MISC/fix_rhok.cpp b/src/USER-MISC/fix_rhok.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..58b0e95a97e0abe10ac7d48e1e93807a0c5a16ab
--- /dev/null
+++ b/src/USER-MISC/fix_rhok.cpp
@@ -0,0 +1,245 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Ulf R. Pedersen, ulf@urp.dk
+------------------------------------------------------------------------- */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "fix_rhok.h"
+#include "atom.h"
+#include "domain.h"
+#include "error.h"
+#include "force.h"
+#include "respa.h"
+#include "update.h"
+#include "citeme.h"
+
+using namespace LAMMPS_NS;
+using namespace FixConst;
+
+static const char cite_fix_rhok[] =
+  "Bias on the collective density field (fix rhok):\n\n"
+  "@Article{pedersen_jcp139_104102_2013,\n"
+  "title = {Direct calculation of the solid-liquid Gibbs free energy difference in a single equilibrium simulation},\n"
+  "volume = {139},\n"
+  "number = {10},\n"
+  "url = {http://aip.scitation.org/doi/10.1063/1.4818747},\n"
+  "doi = {10.1063/1.4818747},\n"
+  "urldate = {2017-10-03},\n"
+  "journal = {J. Chem. Phys.},\n"
+  "author = {Pedersen, Ulf R.},\n"
+  "year = {2013},\n"
+  "pages = {104102}\n"
+  "}\n\n";
+
+FixRhok::FixRhok( LAMMPS* inLMP, int inArgc, char** inArgv )
+  : Fix( inLMP, inArgc, inArgv )
+{
+
+  if (lmp->citeme) lmp->citeme->add(cite_fix_rhok);
+
+  // Check arguments
+  if( inArgc != 8 )
+    error->all(FLERR,"Illegal fix rhoKUmbrella command" );
+
+  // Set up fix flags
+  scalar_flag = 1;         // have compute_scalar
+  vector_flag = 1;         // have compute_vector...
+  size_vector = 3;         // ...with this many components
+  global_freq = 1;         // whose value can be computed at every timestep
+  thermo_energy = 1;       // this fix changes system's potential energy
+  extscalar = 0;           // but the deltaPE might not scale with # of atoms
+  extvector = 0;           // neither do the components of the vector
+
+  // Parse fix options
+  int n[3];
+
+  n[0]   = force->inumeric(FLERR,inArgv[3]);
+  n[1]   = force->inumeric(FLERR,inArgv[4]);
+  n[2]   = force->inumeric(FLERR,inArgv[5]);
+
+  mK[0] = n[0]*(2*M_PI / (domain->boxhi[0] - domain->boxlo[0]));
+  mK[1] = n[1]*(2*M_PI / (domain->boxhi[1] - domain->boxlo[1]));
+  mK[2] = n[2]*(2*M_PI / (domain->boxhi[2] - domain->boxlo[2]));
+
+  mKappa = force->numeric(FLERR,inArgv[6]);
+  mRhoK0 = force->numeric(FLERR,inArgv[7]);
+}
+
+// Methods that this fix implements
+// --------------------------------
+
+// Tells LAMMPS where this fix should act
+int
+FixRhok::setmask()
+{
+  int mask = 0;
+
+  // This fix modifies forces...
+  mask |= POST_FORCE;
+  mask |= POST_FORCE_RESPA;
+  mask |= MIN_POST_FORCE;
+
+  // ...and potential energies
+  mask |= THERMO_ENERGY;
+
+  return mask;
+}
+
+// Initializes the fix at the beginning of a run
+void
+FixRhok::init()
+{
+  // RESPA boilerplate
+  if( strcmp( update->integrate_style, "respa" ) == 0 )
+    mNLevelsRESPA = ((Respa *) update->integrate)->nlevels;
+
+  // Count the number of affected particles
+  int nThisLocal = 0;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+  for( int i = 0; i < nlocal; i++ ) {   // Iterate through all atoms on this CPU
+    if( mask[i] & groupbit ) {          // ...only those affected by this fix
+      nThisLocal++;
+    }
+  }
+  MPI_Allreduce( &nThisLocal, &mNThis,
+                 1, MPI_INT, MPI_SUM, world );
+  mSqrtNThis = sqrt( mNThis );
+}
+
+// Initial application of the fix to a system (when doing MD)
+void
+FixRhok::setup( int inVFlag )
+{
+  if( strcmp( update->integrate_style, "verlet" ) == 0 )
+    post_force( inVFlag );
+  else
+    {
+      ((Respa *) update->integrate)->copy_flevel_f( mNLevelsRESPA - 1 );
+      post_force_respa( inVFlag, mNLevelsRESPA - 1,0 );
+      ((Respa *) update->integrate)->copy_f_flevel( mNLevelsRESPA - 1 );
+    }
+}
+
+// Initial application of the fix to a system (when doing minimization)
+void
+FixRhok::min_setup( int inVFlag )
+{
+  post_force( inVFlag );
+}
+
+// Modify the forces calculated in the main force loop of ordinary MD
+void
+FixRhok::post_force( int inVFlag )
+{
+  double **x = atom->x;
+  double **f = atom->f;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+
+  // Loop over locally-owned atoms affected by this fix and calculate the
+  // partial rhoK's
+  mRhoKLocal[0] = 0.0;
+  mRhoKLocal[1] = 0.0;
+
+  for( int i = 0; i < nlocal; i++ ) {   // Iterate through all atoms on this CPU
+    if( mask[i] & groupbit ) {          // ...only those affected by this fix
+
+      // rho_k = sum_i exp( - i k.r_i )
+      mRhoKLocal[0] += cos( mK[0]*x[i][0] + mK[1]*x[i][1] + mK[2]*x[i][2] );
+      mRhoKLocal[1] -= sin( mK[0]*x[i][0] + mK[1]*x[i][1] + mK[2]*x[i][2] );
+    }
+  }
+
+  // Now calculate mRhoKGlobal
+  MPI_Allreduce( mRhoKLocal, mRhoKGlobal,
+                 2, MPI_DOUBLE, MPI_SUM, world );
+
+  // Info:  < \sum_{i,j} e^{-ik.(r_i - r_j)} > ~ N, so
+  // we define rho_k as (1 / sqrt(N)) \sum_i e^{-i k.r_i}, so that
+  // <rho_k^2> is intensive.
+  mRhoKGlobal[0] /= mSqrtNThis;
+  mRhoKGlobal[1] /= mSqrtNThis;
+
+  // We'll need magnitude of rho_k
+  double rhoK = sqrt( mRhoKGlobal[0]*mRhoKGlobal[0]
+                      + mRhoKGlobal[1]*mRhoKGlobal[1] );
+
+  for( int i = 0; i < nlocal; i++ ) {   // Iterate through all atoms on this CPU
+    if( mask[i] & groupbit ) {          // ...only those affected by this fix
+
+      // Calculate forces
+      // U = kappa/2 ( |rho_k| - rho_k^0 )^2
+      // f_i = -grad_i U = -kappa ( |rho_k| - rho_k^0 ) grad_i |rho_k|
+      // grad_i |rho_k| = Re( rho_k* (-i k e^{-i k . r_i} / sqrt(N)) ) / |rho_k|
+      //
+      // In terms of real and imag parts of rho_k,
+      //
+      // Re( rho_k* (-i k e^{-i k . r_i}) ) =
+      //   (- Re[rho_k] * sin( k . r_i ) - Im[rho_k] * cos( k . r_i )) * k
+
+      double sinKRi = sin( mK[0]*x[i][0] + mK[1]*x[i][1] + mK[2]*x[i][2] );
+      double cosKRi = cos( mK[0]*x[i][0] + mK[1]*x[i][1] + mK[2]*x[i][2] );
+
+      double prefactor = mKappa * ( rhoK - mRhoK0 ) / rhoK
+        * (-mRhoKGlobal[0]*sinKRi - mRhoKGlobal[1]*cosKRi) / mSqrtNThis;
+      f[i][0] -= prefactor * mK[0];
+      f[i][1] -= prefactor * mK[1];
+      f[i][2] -= prefactor * mK[2];
+    }
+  }
+}
+
+// Forces in RESPA loop
+void
+FixRhok::post_force_respa( int inVFlag, int inILevel, int inILoop )
+{
+  if( inILevel == mNLevelsRESPA - 1 )
+    post_force( inVFlag );
+}
+
+// Forces in minimization loop
+void
+FixRhok::min_post_force( int inVFlag )
+{
+  post_force( inVFlag );
+}
+
+// Compute the change in the potential energy induced by this fix
+double
+FixRhok::compute_scalar()
+{
+  double rhoK = sqrt( mRhoKGlobal[0]*mRhoKGlobal[0]
+                      + mRhoKGlobal[1]*mRhoKGlobal[1] );
+
+  return 0.5 * mKappa * (rhoK - mRhoK0) * (rhoK - mRhoK0);
+}
+
+// Compute the ith component of the vector
+double
+FixRhok::compute_vector( int inI )
+{
+  if( inI == 0 )
+    return mRhoKGlobal[0];   // Real part
+  else if( inI == 1 )
+    return mRhoKGlobal[1];   // Imagniary part
+  else if( inI == 2 )
+    return sqrt( mRhoKGlobal[0]*mRhoKGlobal[0]
+                 + mRhoKGlobal[1]*mRhoKGlobal[1] );
+  else
+    return 12345.0;
+}
diff --git a/src/USER-MISC/fix_rhok.h b/src/USER-MISC/fix_rhok.h
new file mode 100644
index 0000000000000000000000000000000000000000..c950c08b1d14aa3b22f14ce98e9b5c65eb513750
--- /dev/null
+++ b/src/USER-MISC/fix_rhok.h
@@ -0,0 +1,77 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef FIX_CLASS
+
+FixStyle(rhok,FixRhok)
+
+#else
+
+#ifndef LMP_FIX_RHOK_H
+#define LMP_FIX_RHOK_H
+
+#include "fix.h"
+
+namespace LAMMPS_NS {
+
+class FixRhok : public Fix
+{
+public:
+  // Constructor: all the parameters to this fix specified in
+  // the LAMMPS input get passed in
+  FixRhok( LAMMPS* inLMP, int inArgc, char** inArgv );
+  virtual ~FixRhok() {};
+
+  // Methods that this fix implements
+  // --------------------------------
+
+  // Tells LAMMPS where this fix should act
+  int setmask();
+
+  // Initializes the fix at the beginning of a run
+  void init();
+
+  // Initial application of the fix to a system (when doing MD / minimization)
+  void setup( int inVFlag );
+  void min_setup( int inVFlag );
+
+  // Modify the forces calculated in the main force loop, either when
+  // doing usual MD, RESPA MD or minimization
+  void post_force( int inVFlag );
+  void post_force_respa( int inVFlag, int inILevel, int inILoop );
+  void min_post_force( int inVFlag );
+
+  // Compute the change in the potential energy induced by this fix
+  double compute_scalar();
+
+        // Compute the ith component of the vector associated with this fix
+  double compute_vector( int inI );
+
+private:
+  // RESPA boilerplate
+  int mNLevelsRESPA;
+
+  // Defining parameters for this umbrella
+        double mK[3], mKappa, mRhoK0;
+
+        // Number of particles affected by the fix
+        int mNThis;
+        double mSqrtNThis;
+
+        // Real and imaginary parts of rho_k := sum_i exp( - i k . r_i )
+        double mRhoKLocal[2], mRhoKGlobal[2];
+};
+
+}  // namespace LAMMPS_NS
+
+#endif // __FIX_RHOK__
+#endif // FIX_CLASS
+
diff --git a/src/USER-MISC/fix_srp.cpp b/src/USER-MISC/fix_srp.cpp
index f3dec42a8320de89202129fac1741118530f53dc..e1e5f579b875c31f16c70c3f55ba3d44a3f14494 100644
--- a/src/USER-MISC/fix_srp.cpp
+++ b/src/USER-MISC/fix_srp.cpp
@@ -98,7 +98,7 @@ int FixSRP::setmask()
 
 void FixSRP::init()
 {
-  if (force->pair_match("hybrid",1) == NULL)
+  if (force->pair_match("hybrid",1) == NULL && force->pair_match("hybrid/overlay",1) == NULL)
     error->all(FLERR,"Cannot use pair srp without pair_style hybrid");
 
   int has_rigid = 0;
diff --git a/src/USER-NETCDF/dump_netcdf.cpp b/src/USER-NETCDF/dump_netcdf.cpp
index 971f69f7ccdf8000d4b39e78107fa4c058536387..af9f94a728f8b61f43e08480f7b80dc33353d54d 100644
--- a/src/USER-NETCDF/dump_netcdf.cpp
+++ b/src/USER-NETCDF/dump_netcdf.cpp
@@ -88,8 +88,8 @@ DumpNetCDF::DumpNetCDF(LAMMPS *lmp, int narg, char **arg) :
 
   if (multiproc)
     error->all(FLERR,"Multi-processor writes are not supported.");
-  if (multifile)
-    error->all(FLERR,"Multiple files are not supported.");
+  if (append_flag && multifile)
+    error->all(FLERR,"Cannot append when writing to multiple files.");
 
   perat = new nc_perat_t[nfield];
 
@@ -224,6 +224,24 @@ DumpNetCDF::~DumpNetCDF()
 
 void DumpNetCDF::openfile()
 {
+  char *filecurrent = filename;
+  if (multifile && !singlefile_opened) {
+    char *filestar = filecurrent;
+    filecurrent = new char[strlen(filestar) + 16];
+    char *ptr = strchr(filestar,'*');
+    *ptr = '\0';
+    if (padflag == 0)
+      sprintf(filecurrent,"%s" BIGINT_FORMAT "%s",
+              filestar,update->ntimestep,ptr+1);
+    else {
+      char bif[8],pad[16];
+      strcpy(bif,BIGINT_FORMAT);
+      sprintf(pad,"%%s%%0%d%s%%s",padflag,&bif[1]);
+      sprintf(filecurrent,pad,filestar,update->ntimestep,ptr+1);
+    }
+    *ptr = '*';
+  }
+
   if (thermo && !singlefile_opened) {
     if (thermovar)  delete [] thermovar;
     thermovar = new int[output->thermo->nfield];
@@ -268,14 +286,14 @@ void DumpNetCDF::openfile()
   ntotalgr = group->count(igroup);
 
   if (filewriter) {
-    if (append_flag && access(filename, F_OK) != -1) {
+    if (append_flag && !multifile && access(filecurrent, F_OK) != -1) {
       // Fixme! Perform checks if dimensions and variables conform with
       // data structure standard.
 
       if (singlefile_opened) return;
       singlefile_opened = 1;
 
-      NCERRX( nc_open(filename, NC_WRITE, &ncid), filename );
+      NCERRX( nc_open(filecurrent, NC_WRITE, &ncid), filecurrent );
 
       // dimensions
       NCERRX( nc_inq_dimid(ncid, NC_FRAME_STR, &frame_dim), NC_FRAME_STR );
@@ -312,8 +330,7 @@ void DumpNetCDF::openfile()
         // Type mangling
         if (vtype[perat[i].field[0]] == INT) {
           xtype = NC_INT;
-        }
-        else {
+        } else {
           if (double_precision)
             xtype = NC_DOUBLE;
           else
@@ -337,10 +354,13 @@ void DumpNetCDF::openfile()
       NCERR( nc_inq_dimlen(ncid, frame_dim, &nframes) );
       // framei == -1 means append to file, == -2 means override last frame
       // Note that in the input file this translates to 'yes', '-1', etc.
-      if (framei < 0 || (append_flag && framei == 0))  framei = nframes+framei+1;
+
+      if (framei <= 0) framei = nframes+framei+1;
       if (framei < 1)  framei = 1;
-    }
-    else {
+    } else {
+      if (framei != 0)
+        error->all(FLERR,"at keyword requires use of 'append yes'");
+
       int dims[NC_MAX_VAR_DIMS];
       size_t index[NC_MAX_VAR_DIMS], count[NC_MAX_VAR_DIMS];
       double d[1];
@@ -348,8 +368,8 @@ void DumpNetCDF::openfile()
       if (singlefile_opened) return;
       singlefile_opened = 1;
 
-      NCERRX( nc_create(filename, NC_64BIT_DATA, &ncid),
-          filename );
+      NCERRX( nc_create(filecurrent, NC_64BIT_DATA, &ncid),
+              filecurrent );
 
       // dimensions
       NCERRX( nc_def_dim(ncid, NC_FRAME_STR, NC_UNLIMITED, &frame_dim),
@@ -598,15 +618,39 @@ void DumpNetCDF::closefile()
   if (filewriter && singlefile_opened) {
     NCERR( nc_close(ncid) );
     singlefile_opened = 0;
-    // append next time DumpNetCDF::openfile is called
-    append_flag = 1;
     // write to next frame upon next open
-    framei++;
+    if (multifile)
+      framei = 1;
+    else {
+      // append next time DumpNetCDF::openfile is called
+      append_flag = 1;
+      framei++;
+    }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
+template <typename T>
+int nc_put_var1_bigint(int ncid, int varid, const size_t index[], const T* tp)
+{
+  return nc_put_var1_int(ncid, varid, index, tp);
+}
+
+template <>
+int nc_put_var1_bigint<long>(int ncid, int varid, const size_t index[],
+                        const long* tp)
+{
+  return nc_put_var1_long(ncid, varid, index, tp);
+}
+
+template <>
+int nc_put_var1_bigint<long long>(int ncid, int varid, const size_t index[],
+                             const long long* tp)
+{
+  return nc_put_var1_longlong(ncid, varid, index, tp);
+}
+
 void DumpNetCDF::write()
 {
   // open file
@@ -638,13 +682,8 @@ void DumpNetCDF::write()
                   th->keyword[i] );
         }
         else if (th->vtype[i] == BIGINT) {
-#if defined(LAMMPS_SMALLBIG) || defined(LAMMPS_BIGBIG)
-          NCERRX( nc_put_var1_long(ncid, thermovar[i], start, &th->bivalue),
-                  th->keyword[i] );
-#else
-          NCERRX( nc_put_var1_int(ncid, thermovar[i], start, &th->bivalue),
+          NCERRX( nc_put_var1_bigint(ncid, thermovar[i], start, &th->bivalue),
                   th->keyword[i] );
-#endif
         }
       }
     }
@@ -888,8 +927,11 @@ int DumpNetCDF::modify_param(int narg, char **arg)
   }
   else if (strcmp(arg[iarg],"at") == 0) {
     iarg++;
+    if (iarg >= narg)
+      error->all(FLERR,"expected additional arg after 'at' keyword.");
     framei = force->inumeric(FLERR,arg[iarg]);
-    if (framei < 0)  framei--;
+    if (framei == 0) error->all(FLERR,"frame 0 not allowed for 'at' keyword.");
+    else if (framei < 0) framei--;
     iarg++;
     return 2;
   }
@@ -911,68 +953,6 @@ int DumpNetCDF::modify_param(int narg, char **arg)
 
 /* ---------------------------------------------------------------------- */
 
-void DumpNetCDF::write_prmtop()
-{
-  char fn[1024];
-  char tmp[81];
-  FILE *f;
-
-  strcpy(fn, filename);
-  strcat(fn, ".prmtop");
-
-  f = fopen(fn, "w");
-  fprintf(f, "%%VERSION  LAMMPS\n");
-  fprintf(f, "%%FLAG TITLE\n");
-  fprintf(f, "%%FORMAT(20a4)\n");
-  memset(tmp, ' ', 76);
-  tmp[76] = '\0';
-  fprintf(f, "NASN%s\n", tmp);
-
-  fprintf(f, "%%FLAG POINTERS\n");
-  fprintf(f, "%%FORMAT(10I8)\n");
-#if defined(LAMMPS_SMALLBIG) || defined(LAMMPS_BIGBIG)
-  fprintf(f, "%8li", ntotalgr);
-#else
-  fprintf(f, "%8i", ntotalgr);
-#endif
-  for (int i = 0; i < 11; i++)
-    fprintf(f, "%8i", 0);
-  fprintf(f, "\n");
-  for (int i = 0; i < 12; i++)
-    fprintf(f, "%8i", 0);
-  fprintf(f, "\n");
-  for (int i = 0; i < 6; i++)
-    fprintf(f, "%8i", 0);
-  fprintf(f, "\n");
-
-  fprintf(f, "%%FLAG ATOM_NAME\n");
-  fprintf(f, "%%FORMAT(20a4)\n");
-  for (int i = 0; i < ntotalgr; i++) {
-    fprintf(f, "%4s", "He");
-    if ((i+1) % 20 == 0)
-      fprintf(f, "\n");
-  }
-
-  fprintf(f, "%%FLAG CHARGE\n");
-  fprintf(f, "%%FORMAT(5E16.5)\n");
-  for (int i = 0; i < ntotalgr; i++) {
-    fprintf(f, "%16.5e", 0.0);
-    if ((i+1) % 5 == 0)
-      fprintf(f, "\n");
-  }
-
-  fprintf(f, "%%FLAG MASS\n");
-  fprintf(f, "%%FORMAT(5E16.5)\n");
-  for (int i = 0; i < ntotalgr; i++) {
-    fprintf(f, "%16.5e", 1.0);
-    if ((i+1) % 5 == 0)
-        fprintf(f, "\n");
-  }
-  fclose(f);
-}
-
-/* ---------------------------------------------------------------------- */
-
 void DumpNetCDF::ncerr(int err, const char *descr, int line)
 {
   if (err != NC_NOERR) {
diff --git a/src/USER-NETCDF/dump_netcdf.h b/src/USER-NETCDF/dump_netcdf.h
index b86f294d3084d41ff7116ef995b7852a55f31bc5..25d64efade446861f152cc27f60728e7a15eb781 100644
--- a/src/USER-NETCDF/dump_netcdf.h
+++ b/src/USER-NETCDF/dump_netcdf.h
@@ -92,7 +92,6 @@ class DumpNetCDF : public DumpCustom {
   void closefile();
   virtual void write_header(bigint);
   virtual void write_data(int, double *);
-  void write_prmtop();
 
   virtual int modify_param(int, char **);
 
diff --git a/src/USER-NETCDF/dump_netcdf_mpiio.cpp b/src/USER-NETCDF/dump_netcdf_mpiio.cpp
index 3b753b1b0453963fc7002e7f6bbfbd690b1178aa..890029371e53fea15213fbc5178c50602e0eaad5 100644
--- a/src/USER-NETCDF/dump_netcdf_mpiio.cpp
+++ b/src/USER-NETCDF/dump_netcdf_mpiio.cpp
@@ -88,8 +88,8 @@ DumpNetCDFMPIIO::DumpNetCDFMPIIO(LAMMPS *lmp, int narg, char **arg) :
 
   if (multiproc)
     error->all(FLERR,"Multi-processor writes are not supported.");
-  if (multifile)
-    error->all(FLERR,"Multiple files are not supported.");
+  if (append_flag && multifile)
+    error->all(FLERR,"Cannot append when writing to multiple files.");
 
   perat = new nc_perat_t[nfield];
 
@@ -217,6 +217,24 @@ DumpNetCDFMPIIO::~DumpNetCDFMPIIO()
 
 void DumpNetCDFMPIIO::openfile()
 {
+  char *filecurrent = filename;
+  if (multifile && !singlefile_opened) {
+    char *filestar = filecurrent;
+    filecurrent = new char[strlen(filestar) + 16];
+    char *ptr = strchr(filestar,'*');
+    *ptr = '\0';
+    if (padflag == 0)
+      sprintf(filecurrent,"%s" BIGINT_FORMAT "%s",
+              filestar,update->ntimestep,ptr+1);
+    else {
+      char bif[8],pad[16];
+      strcpy(bif,BIGINT_FORMAT);
+      sprintf(pad,"%%s%%0%d%s%%s",padflag,&bif[1]);
+      sprintf(filecurrent,pad,filestar,update->ntimestep,ptr+1);
+    }
+    *ptr = '*';
+  }
+
   if (thermo && !singlefile_opened) {
     if (thermovar)  delete [] thermovar;
     thermovar = new int[output->thermo->nfield];
@@ -260,7 +278,7 @@ void DumpNetCDFMPIIO::openfile()
   // get total number of atoms
   ntotalgr = group->count(igroup);
 
-  if (append_flag && access(filename, F_OK) != -1) {
+  if (append_flag && !multifile && access(filecurrent, F_OK) != -1) {
     // Fixme! Perform checks if dimensions and variables conform with
     // data structure standard.
 
@@ -270,8 +288,8 @@ void DumpNetCDFMPIIO::openfile()
     if (singlefile_opened) return;
     singlefile_opened = 1;
 
-    NCERRX( ncmpi_open(MPI_COMM_WORLD, filename, NC_WRITE, MPI_INFO_NULL,
-                       &ncid), filename );
+    NCERRX( ncmpi_open(MPI_COMM_WORLD, filecurrent, NC_WRITE, MPI_INFO_NULL,
+                       &ncid), filecurrent );
 
     // dimensions
     NCERRX( ncmpi_inq_dimid(ncid, NC_FRAME_STR, &frame_dim), NC_FRAME_STR );
@@ -333,10 +351,12 @@ void DumpNetCDFMPIIO::openfile()
     NCERR( ncmpi_inq_dimlen(ncid, frame_dim, &nframes) );
     // framei == -1 means append to file, == -2 means override last frame
     // Note that in the input file this translates to 'yes', '-1', etc.
-    if (framei < 0 || (append_flag && framei == 0))  framei = nframes+framei+1;
+    if (framei <= 0) framei = nframes+framei+1;
     if (framei < 1)  framei = 1;
-  }
-  else {
+  } else {
+    if (framei != 0)
+      error->all(FLERR,"at keyword requires use of 'append yes'");
+
     int dims[NC_MAX_VAR_DIMS];
     MPI_Offset index[NC_MAX_VAR_DIMS], count[NC_MAX_VAR_DIMS];
     double d[1];
@@ -344,8 +364,8 @@ void DumpNetCDFMPIIO::openfile()
     if (singlefile_opened) return;
     singlefile_opened = 1;
 
-    NCERRX( ncmpi_create(MPI_COMM_WORLD, filename, NC_64BIT_DATA,
-                         MPI_INFO_NULL, &ncid), filename );
+    NCERRX( ncmpi_create(MPI_COMM_WORLD, filecurrent, NC_64BIT_DATA,
+                         MPI_INFO_NULL, &ncid), filecurrent );
 
     // dimensions
     NCERRX( ncmpi_def_dim(ncid, NC_FRAME_STR, NC_UNLIMITED, &frame_dim),
@@ -574,15 +594,40 @@ void DumpNetCDFMPIIO::closefile()
   if (singlefile_opened) {
     NCERR( ncmpi_close(ncid) );
     singlefile_opened = 0;
-    // append next time DumpNetCDFMPIIO::openfile is called
-    append_flag = 1;
     // write to next frame upon next open
-    framei++;
+    if (multifile)
+      framei = 1;
+    else {
+      // append next time DumpNetCDFMPIIO::openfile is called
+      append_flag = 1;
+      framei++;
+    }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
+template <typename T>
+int ncmpi_put_var1_bigint(int ncid, int varid, const MPI_Offset index[],
+                     const T* tp)
+{
+  return ncmpi_put_var1_int(ncid, varid, index, tp);
+}
+
+template <>
+int ncmpi_put_var1_bigint<long>(int ncid, int varid, const MPI_Offset index[],
+                           const long* tp)
+{
+  return ncmpi_put_var1_long(ncid, varid, index, tp);
+}
+
+template <>
+int ncmpi_put_var1_bigint<long long>(int ncid, int varid, const MPI_Offset index[],
+                                const long long* tp)
+{
+  return ncmpi_put_var1_longlong(ncid, varid, index, tp);
+}
+
 void DumpNetCDFMPIIO::write()
 {
   // open file
@@ -616,13 +661,8 @@ void DumpNetCDFMPIIO::write()
                   th->keyword[i] );
         }
         else if (th->vtype[i] == BIGINT) {
-#if defined(LAMMPS_SMALLBIG) || defined(LAMMPS_BIGBIG)
-          NCERRX( ncmpi_put_var1_long(ncid, thermovar[i], start, &th->bivalue),
-                  th->keyword[i] );
-#else
-          NCERRX( ncmpi_put_var1_int(ncid, thermovar[i], start, &th->bivalue),
+          NCERRX( ncmpi_put_var1_bigint(ncid, thermovar[i], start, &th->bivalue),
                   th->keyword[i] );
-#endif
         }
       }
     }
@@ -883,8 +923,11 @@ int DumpNetCDFMPIIO::modify_param(int narg, char **arg)
   }
   else if (strcmp(arg[iarg],"at") == 0) {
     iarg++;
+    if (iarg >= narg)
+      error->all(FLERR,"expected additional arg after 'at' keyword.");
     framei = force->inumeric(FLERR,arg[iarg]);
-    if (framei < 0)  framei--;
+    if (framei == 0) error->all(FLERR,"frame 0 not allowed for 'at' keyword.");
+    else if (framei < 0) framei--;
     iarg++;
     return 2;
   }
diff --git a/src/USER-OMP/bond_gromos_omp.cpp b/src/USER-OMP/bond_gromos_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7904c4683b4c5526a99b2e4fa56049cc43be2dbf
--- /dev/null
+++ b/src/USER-OMP/bond_gromos_omp.cpp
@@ -0,0 +1,129 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "bond_gromos_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+
+#include <math.h>
+
+#include "suffix.h"
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+BondGromosOMP::BondGromosOMP(class LAMMPS *lmp)
+  : BondGromos(lmp), ThrOMP(lmp,THR_BOND)
+{
+  suffix_flag |= Suffix::OMP;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void BondGromosOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nbondlist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (inum > 0) {
+      if (evflag) {
+        if (eflag) {
+          if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+          else eval<1,1,0>(ifrom, ito, thr);
+        } else {
+          if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+          else eval<1,0,0>(ifrom, ito, thr);
+        }
+      } else {
+        if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+        else eval<0,0,0>(ifrom, ito, thr);
+      }
+    }
+    thr->timer(Timer::BOND);
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void BondGromosOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,n,type;
+  double delx,dely,delz,ebond,fbond;
+
+  const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
+  dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
+  const int3_t * _noalias const bondlist = (int3_t *) neighbor->bondlist[0];
+  const int nlocal = atom->nlocal;
+  ebond = 0.0;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = bondlist[n].a;
+    i2 = bondlist[n].b;
+    type = bondlist[n].t;
+
+    delx = x[i1].x - x[i2].x;
+    dely = x[i1].y - x[i2].y;
+    delz = x[i1].z - x[i2].z;
+
+    const double rsq = delx*delx + dely*dely + delz*delz;
+    const double dr = rsq - r0[type]*r0[type];
+    const double kdr = k[type]*dr;
+
+    // force & energy
+
+    fbond = -4.0 * kdr;
+
+    if (EFLAG) ebond = kdr;
+
+    // apply force to each of 2 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1].x += delx*fbond;
+      f[i1].y += dely*fbond;
+      f[i1].z += delz*fbond;
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2].x -= delx*fbond;
+      f[i2].y -= dely*fbond;
+      f[i2].z -= delz*fbond;
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND,
+                             ebond,fbond,delx,dely,delz,thr);
+  }
+}
diff --git a/src/USER-OMP/bond_gromos_omp.h b/src/USER-OMP/bond_gromos_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..69e92e42950a17855642f940c083c04e17378aab
--- /dev/null
+++ b/src/USER-OMP/bond_gromos_omp.h
@@ -0,0 +1,46 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef BOND_CLASS
+
+BondStyle(gromos/omp,BondGromosOMP)
+
+#else
+
+#ifndef LMP_BOND_GROMOS_OMP_H
+#define LMP_BOND_GROMOS_OMP_H
+
+#include "bond_gromos.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class BondGromosOMP : public BondGromos, public ThrOMP {
+
+ public:
+  BondGromosOMP(class LAMMPS *lmp);
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/fix_neigh_history_omp.cpp b/src/USER-OMP/fix_neigh_history_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ecc3147ed5d0515dfb13d743531ed28599e11b40
--- /dev/null
+++ b/src/USER-OMP/fix_neigh_history_omp.cpp
@@ -0,0 +1,603 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include <string.h>
+#include <stdio.h>
+#include "fix_neigh_history_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "force.h"
+#include "pair.h"
+#include "update.h"
+#include "memory.h"
+#include "modify.h"
+#include "error.h"
+
+#if defined(_OPENMP)
+#include <omp.h>
+#endif
+
+using namespace LAMMPS_NS;
+using namespace FixConst;
+
+enum{DEFAULT,NPARTNER,PERPARTNER}; // also set in fix neigh/history
+
+
+FixNeighHistoryOMP::FixNeighHistoryOMP(class LAMMPS *lmp,int narg,char **argv)
+  : FixNeighHistory(lmp,narg,argv) {
+
+}
+
+
+/* ----------------------------------------------------------------------
+   copy partner info from neighbor data structs (NDS) to atom arrays
+   should be called whenever NDS store current history info
+     and need to transfer the info to owned atoms
+   e.g. when atoms migrate to new procs, new neigh list built, or between runs
+     when atoms may be added or deleted (NDS becomes out-of-date)
+   the next post_neighbor() will put this info back into new NDS
+   called during run before atom exchanges, including for restart files
+   called at end of run via post_run()
+   do not call during setup of run (setup_pre_exchange)
+     b/c there is no guarantee of a current NDS (even on continued run)
+   if run command does a 2nd run with pre = no, then no neigh list
+     will be built, but old neigh list will still have the info
+   onesided and newton on and newton off versions
+------------------------------------------------------------------------- */
+// below is the pre_exchange() function from the parent class
+// void FixNeighHistory::pre_exchange()
+// {
+//  if (onesided) pre_exchange_onesided();
+//  else if (newton_pair) pre_exchange_newton();
+//  else pre_exchange_no_newton();
+//}
+
+/* ----------------------------------------------------------------------
+   onesided version for sphere contact with line/tri particles
+   neighbor list has I = sphere, J = line/tri
+   only store history info with spheres
+------------------------------------------------------------------------- */
+
+void FixNeighHistoryOMP::pre_exchange_onesided()
+{
+  const int nthreads = comm->nthreads;
+  const int nlocal = atom->nlocal;
+  maxpartner = 0;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none)
+#endif
+  {
+
+#if defined(_OPENMP)
+    const int tid = omp_get_thread_num();
+#else
+    const int tid = 0;
+#endif
+
+    int i,j,ii,jj,m,n,inum,jnum;
+    int *ilist,*jlist,*numneigh,**firstneigh;
+    int *allflags;
+    double *allvalues,*onevalues;
+
+    // NOTE: all operations until very end are with:
+    //   nlocal_neigh <= current nlocal
+    // b/c previous neigh list was built with nlocal_neigh
+    // nlocal can be larger if other fixes added atoms at this pre_exchange()
+
+    // clear per-thread paged data structures
+    
+    MyPage <tagint> &ipg = ipage_atom[tid];
+    MyPage <double> &dpg = dpage_atom[tid];
+    ipg.reset();
+    dpg.reset();
+
+    // each thread works on a fixed chunk of local and ghost atoms.
+    const int ldelta = 1 + nlocal_neigh/nthreads;
+    const int lfrom = tid*ldelta;
+    const int lmax = lfrom +ldelta;
+    const int lto = (lmax > nlocal_neigh) ? nlocal_neigh : lmax;
+
+    // 1st loop over neighbor list, I = sphere, J = tri
+    // only calculate npartner for each owned spheres
+
+    for (i = lfrom; i < lto; i++) npartner[i] = 0;
+
+    tagint *tag = atom->tag;
+    NeighList *list = pair->list;
+    inum = list->inum;
+    ilist = list->ilist;
+    numneigh = list->numneigh;
+    firstneigh = list->firstneigh;
+
+    for (ii = 0; ii < inum; ii++) {
+      i = ilist[ii];
+      jlist = firstneigh[i];
+      jnum = numneigh[i];
+      allflags = firstflag[i];
+
+      for (jj = 0; jj < jnum; jj++)
+        if (allflags[jj]) 
+          if ((i >= lfrom) && (i < lto)) npartner[i]++;
+    }
+
+    // get page chunks to store atom IDs and shear history for my atoms
+
+    for (ii = 0; ii < inum; ii++) {
+      i = ilist[ii];
+      if ((i >= lfrom) && (i < lto)) {
+        n = npartner[i];
+        partner[i] = ipg.get(n);
+        valuepartner[i] = dpg.get(dnum*n);
+        if (partner[i] == NULL || valuepartner[i] == NULL)
+          error->one(FLERR,"Neighbor history overflow, boost neigh_modify one");
+      }
+    }
+
+    // 2nd loop over neighbor list
+    // store partner IDs and values for owned+ghost atoms
+    // re-zero npartner to use as counter
+
+    for (i = lfrom; i < lto; i++) npartner[i] = 0;
+
+    for (ii = 0; ii < inum; ii++) {
+      i = ilist[ii];
+      jlist = firstneigh[i];
+      jnum = numneigh[i];
+      allflags = firstflag[i];
+      allvalues = firstvalue[i];
+
+      for (jj = 0; jj < jnum; jj++) {
+        if (allflags[jj]) {
+          onevalues = &allvalues[dnum*jj];
+          j = jlist[jj];
+          j &= NEIGHMASK;
+
+          if ((i >= lfrom) && (i < lto)) {
+            m = npartner[i]++;
+            partner[i][m] = tag[j];
+            memcpy(&valuepartner[i][dnum*m],onevalues,dnumbytes);
+          }
+        }
+      }
+    }
+
+    // set maxpartner = max # of partners of any owned atom
+    maxpartner = m = 0;
+    for (i = lfrom; i < lto; i++)
+      m = MAX(m,npartner[i]);
+
+#if defined(_OPENMP)
+#pragma omp critical
+#endif
+    {
+      maxpartner = MAX(m,maxpartner);
+      comm->maxexchange_fix =MAX(comm->maxexchange_fix,(dnum+1)*maxpartner+1);
+    }
+  }
+
+  // zero npartner values from previous nlocal_neigh to current nlocal
+  for (int i = nlocal_neigh; i < nlocal; ++i) npartner[i] = 0;
+}
+
+/* -------------------------------------------------------------------- */
+
+void FixNeighHistoryOMP::pre_exchange_newton()
+{
+  const int nthreads = comm->nthreads;
+  maxpartner = 0;
+  for (int i = 0; i < nall_neigh; i++) npartner[i] = 0;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none)
+#endif
+  {
+
+#if defined(_OPENMP)
+    const int tid = omp_get_thread_num();
+#else
+    const int tid = 0;
+#endif
+
+    int i,j,ii,jj,m,n,inum,jnum;
+    int *ilist,*jlist,*numneigh,**firstneigh;
+    int *allflags;
+    double *allvalues,*onevalues,*jvalues;
+
+    MyPage <tagint> &ipg = ipage_atom[tid];
+    MyPage <double> &dpg = dpage_atom[tid];
+    ipg.reset();
+    dpg.reset();
+
+    // 1st loop over neighbor list
+    // calculate npartner for each owned+ghost atom
+
+    tagint *tag = atom->tag;
+
+    NeighList *list = pair->list;
+    inum = list->inum;
+    ilist = list->ilist;
+    numneigh = list->numneigh;
+    firstneigh = list->firstneigh;
+
+    // each thread works on a fixed chunk of local and ghost atoms.
+    const int ldelta = 1 + nlocal_neigh/nthreads;
+    const int lfrom = tid*ldelta;
+    const int lmax = lfrom +ldelta;
+    const int lto = (lmax > nlocal_neigh) ? nlocal_neigh : lmax;
+
+    for (ii = 0; ii < inum; ii++) {
+      i = ilist[ii];
+      jlist = firstneigh[i];
+      jnum = numneigh[i];
+      allflags = firstflag[i];
+
+      for (jj = 0; jj < jnum; jj++) {
+        if (allflags[jj]) {
+          if ((i >= lfrom) && (i < lto))
+            npartner[i]++;
+
+          j = jlist[jj];
+          j &= NEIGHMASK;
+          if ((j >= lfrom) && (j < lto))
+            npartner[j]++;
+        }
+      }
+    }
+#if defined(_OPENMP)    
+#pragma omp barrier
+    {;}
+
+    // perform reverse comm to augment owned npartner counts with ghost counts
+
+#pragma omp master
+#endif
+    {
+      commflag = NPARTNER;
+      comm->reverse_comm_fix(this,0);
+    }
+
+    // get page chunks to store atom IDs and shear history for my atoms
+
+    for (ii = 0; ii < inum; ii++) {
+      i = ilist[ii];
+      if ((i >= lfrom) && (i < lto)) {
+        n = npartner[i];
+        partner[i] = ipg.get(n);
+        valuepartner[i] = dpg.get(dnum*n);
+        if (partner[i] == NULL || valuepartner[i] == NULL)
+          error->one(FLERR,"Neighbor history overflow, boost neigh_modify one");
+      }
+    }
+
+#if defined(_OPENMP)
+#pragma omp master
+#endif
+    {
+      for (i = nlocal_neigh; i < nall_neigh; i++) {
+        n = npartner[i];
+        partner[i] = ipg.get(n);
+        valuepartner[i] = dpg.get(dnum*n);
+        if (partner[i] == NULL || valuepartner[i] == NULL) {
+          error->one(FLERR,"Neighbor history overflow, boost neigh_modify one");
+        }
+      }
+    }
+
+    // 2nd loop over neighbor list
+    // store partner IDs and values for owned+ghost atoms
+    // re-zero npartner to use as counter
+
+    for (i = lfrom; i < lto; i++) npartner[i] = 0;
+
+    for (ii = 0; ii < inum; ii++) {
+      i = ilist[ii];
+      jlist = firstneigh[i];
+      jnum = numneigh[i];
+      allflags = firstflag[i];
+      allvalues = firstvalue[i];
+
+      for (jj = 0; jj < jnum; jj++) {
+        if (allflags[jj]) {
+          onevalues = &allvalues[dnum*jj];
+          j = jlist[jj];
+          j &= NEIGHMASK;
+
+          if ((i >= lfrom) && (i < lto)) {
+            m = npartner[i]++;
+            partner[i][m] = tag[j];
+            memcpy(&valuepartner[i][dnum*m],onevalues,dnumbytes);
+          }
+
+          if ((j >= lfrom) && (j < lto)) {
+            m = npartner[j]++;
+            partner[j][m] = tag[i];
+            jvalues = &valuepartner[j][dnum*m];
+            for (n = 0; n < dnum; n++) jvalues[n] = -onevalues[n];
+          }
+        }
+      }
+    }
+#if defined(_OPENMP)    
+#pragma omp barrier
+    {;}
+
+#pragma omp master
+#endif
+    {
+      // perform reverse comm to augment
+      // owned atom partner/valuepartner with ghost info
+      // use variable variant b/c size of packed data can be arbitrarily large
+      //   if many touching neighbors for large particle
+
+      commflag = PERPARTNER;
+      comm->reverse_comm_fix_variable(this);
+    }
+
+    // set maxpartner = max # of partners of any owned atom
+    m = 0;
+    for (i = lfrom; i < lto; i++)
+      m = MAX(m,npartner[i]);
+
+#if defined(_OPENMP)
+#pragma omp critical
+#endif
+    {
+      maxpartner = MAX(m,maxpartner);
+      comm->maxexchange_fix = MAX(comm->maxexchange_fix,4*maxpartner+1);
+    }
+  }
+
+  // zero npartner values from previous nlocal_neigh to current nlocal
+
+  int nlocal = atom->nlocal;
+  for (int i = nlocal_neigh; i < nlocal; i++) npartner[i] = 0;
+}
+
+/* -------------------------------------------------------------------- */
+
+void FixNeighHistoryOMP::pre_exchange_no_newton()
+{
+  const int nthreads = comm->nthreads;
+  maxpartner = 0;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none)
+#endif
+  {
+
+#if defined(_OPENMP)
+    const int tid = omp_get_thread_num();
+#else
+    const int tid = 0;
+#endif
+
+    int i,j,ii,jj,m,n,inum,jnum;
+    int *ilist,*jlist,*numneigh,**firstneigh;
+    int *allflags;
+    double *allvalues,*onevalues,*jvalues;
+
+    MyPage <tagint> &ipg = ipage_atom[tid];
+    MyPage <double> &dpg = dpage_atom[tid];
+    ipg.reset();
+    dpg.reset();
+
+    // 1st loop over neighbor list
+    // calculate npartner for each owned atom
+
+    tagint *tag = atom->tag;
+
+    NeighList *list = pair->list;
+    inum = list->inum;
+    ilist = list->ilist;
+    numneigh = list->numneigh;
+    firstneigh = list->firstneigh;
+
+    // each thread works on a fixed chunk of local and ghost atoms.
+    const int ldelta = 1 + nlocal_neigh/nthreads;
+    const int lfrom = tid*ldelta;
+    const int lmax = lfrom +ldelta;
+    const int lto = (lmax > nlocal_neigh) ? nlocal_neigh : lmax;
+
+    // zero npartners for all current atoms and
+    // clear page data structures for this thread
+
+    for (i = lfrom; i < lto; i++) npartner[i] = 0;
+
+
+    for (ii = 0; ii < inum; ii++) {
+      i = ilist[ii];
+      jlist = firstneigh[i];
+      jnum = numneigh[i];
+      allflags = firstflag[i];
+
+      for (jj = 0; jj < jnum; jj++) {
+        if (allflags[jj]) {
+          if ((i >= lfrom) && (i < lto))
+            npartner[i]++;
+
+          j = jlist[jj];
+          j &= NEIGHMASK;
+          if ((j >= lfrom) && (j < lto))
+            npartner[j]++;
+        }
+      }
+    }
+
+    // get page chunks to store atom IDs and shear history for my atoms
+
+    for (ii = 0; ii < inum; ii++) {
+      i = ilist[ii];
+      if ((i >= lfrom) && (i < lto)) {
+        n = npartner[i];
+        partner[i] = ipg.get(n);
+        valuepartner[i] = dpg.get(dnum*n);
+        if (partner[i] == NULL || valuepartner[i] == NULL)
+          error->one(FLERR,"Neighbor history overflow, boost neigh_modify one");
+      }
+    }
+
+    // 2nd loop over neighbor list
+    // store partner IDs and values for owned+ghost atoms
+    // re-zero npartner to use as counter
+
+    for (i = lfrom; i < lto; i++) npartner[i] = 0;
+
+    for (ii = 0; ii < inum; ii++) {
+      i = ilist[ii];
+      jlist = firstneigh[i];
+      jnum = numneigh[i];
+      allflags = firstflag[i];
+      allvalues = firstvalue[i];
+
+      for (jj = 0; jj < jnum; jj++) {
+        if (allflags[jj]) {
+          onevalues = &allvalues[dnum*jj];
+          j = jlist[jj];
+          j &= NEIGHMASK;
+
+          if ((i >= lfrom) && (i < lto)) {
+            m = npartner[i]++;
+            partner[i][m] = tag[j];
+            memcpy(&valuepartner[i][dnum*m],onevalues,dnumbytes);
+          }
+
+          if ((j >= lfrom) && (j < lto)) {
+            m = npartner[j]++;
+            partner[j][m] = tag[i];
+            jvalues = &valuepartner[j][dnum*m];
+            for (n = 0; n < dnum; n++) jvalues[n] = -onevalues[n];
+          }
+        }
+      }
+    }
+
+    // set maxpartner = max # of partners of any owned atom
+    m = 0;
+    for (i = lfrom; i < lto; i++)
+      m = MAX(m,npartner[i]);
+
+#if defined(_OPENMP)
+#pragma omp critical
+#endif
+    {
+      maxpartner = MAX(m,maxpartner);
+      comm->maxexchange_fix = MAX(comm->maxexchange_fix,(dnum+1)*maxpartner+1);
+    }
+  }
+}
+
+/* -------------------------------------------------------------------- */
+
+void FixNeighHistoryOMP::post_neighbor()
+{
+  const int nthreads = comm->nthreads;
+  maxpartner = 0;
+  const int nlocal = atom->nlocal;
+  const int nall = nlocal + atom->nghost;
+  nlocal_neigh = nlocal;
+  nall_neigh = nall;
+
+  // realloc firstflag and firstvalue if needed
+
+  if (maxatom < nlocal) {
+    memory->sfree(firstflag);
+    memory->sfree(firstvalue);
+    maxatom = nall;
+    firstflag = (int **) 
+      memory->smalloc(maxatom*sizeof(int *),"neighbor_history:firstflag");
+    firstvalue = (double **) 
+      memory->smalloc(maxatom*sizeof(double *),"neighbor_history:firstvalue");
+  }
+
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none)
+#endif
+  {
+
+#if defined(_OPENMP)
+    const int tid = omp_get_thread_num();
+#else
+    const int tid = 0;
+#endif
+
+    int i,j,ii,jj,m,nn,np,inum,jnum,rflag;
+    tagint jtag;
+    int *ilist,*jlist,*numneigh,**firstneigh;
+    int *allflags;
+    double *allvalues;
+
+    MyPage <tagint> &ipg = ipage_atom[tid];
+    MyPage <double> &dpg = dpage_atom[tid];
+    ipg.reset();
+    dpg.reset();
+
+    // 1st loop over neighbor list
+    // calculate npartner for each owned atom
+
+    tagint *tag = atom->tag;
+
+    NeighList *list = pair->list;
+    inum = list->inum;
+    ilist = list->ilist;
+    numneigh = list->numneigh;
+    firstneigh = list->firstneigh;
+
+    // each thread works on a fixed chunk of local and ghost atoms.
+    const int ldelta = 1 + inum/nthreads;
+    const int lfrom = tid*ldelta;
+    const int lmax = lfrom +ldelta;
+    const int lto = (lmax > inum) ? inum : lmax;
+
+    for (ii = lfrom; ii < lto; ii++) {
+      i = ilist[ii];
+      jlist = firstneigh[i];
+      jnum = numneigh[i];
+      firstflag[i] = allflags = ipg.get(jnum);
+      firstvalue[i] = allvalues = dpg.get(jnum*dnum);
+      np = npartner[i];
+      nn = 0;
+
+      for (jj = 0; jj < jnum; jj++) {
+        j = jlist[jj];
+        rflag = sbmask(j);
+        j &= NEIGHMASK;
+        jlist[jj] = j;
+        
+        // rflag = 1 if r < radsum in npair_size() method
+        // preserve neigh history info if tag[j] is in old-neigh partner list
+        // this test could be more geometrically precise for two sphere/line/tri
+
+        if (rflag) {
+          jtag = tag[j];
+          for (m = 0; m < np; m++)
+            if (partner[i][m] == jtag) break;
+          if (m < np) {
+            allflags[jj] = 1;
+            memcpy(&allvalues[nn],&valuepartner[i][dnum*m],dnumbytes);
+          } else {
+            allflags[jj] = 0;
+            memcpy(&allvalues[nn],zeroes,dnumbytes);
+          }
+        } else {
+          allflags[jj] = 0;
+          memcpy(&allvalues[nn],zeroes,dnumbytes);
+        }
+        nn += dnum;
+      }
+    }
+  }
+}
diff --git a/src/USER-OMP/fix_shear_history_omp.h b/src/USER-OMP/fix_neigh_history_omp.h
similarity index 64%
rename from src/USER-OMP/fix_shear_history_omp.h
rename to src/USER-OMP/fix_neigh_history_omp.h
index 95281b2afc56b5b5bbbc3f946ce61ef5a6998f34..9cd97ce3da76db17ca49727d433028971790b550 100644
--- a/src/USER-OMP/fix_shear_history_omp.h
+++ b/src/USER-OMP/fix_neigh_history_omp.h
@@ -13,23 +13,25 @@
 
 #ifdef FIX_CLASS
 
-FixStyle(SHEAR_HISTORY/omp,FixShearHistoryOMP)
+FixStyle(NEIGH_HISTORY/omp,FixNeighHistoryOMP)
 
 #else
 
-#ifndef LMP_FIX_SHEAR_HISTORY_OMP_H
-#define LMP_FIX_SHEAR_HISTORY_OMP_H
+#ifndef LMP_FIX_NEIGH_HISTORY_OMP_H
+#define LMP_FIX_NEIGH_HISTORY_OMP_H
 
-#include "fix_shear_history.h"
+#include "fix_neigh_history.h"
 
 namespace LAMMPS_NS {
 
-class FixShearHistoryOMP : public FixShearHistory {
+class FixNeighHistoryOMP : public FixNeighHistory {
 
  public:
-  FixShearHistoryOMP(class LAMMPS *lmp, int narg, char **argv)
-    : FixShearHistory(lmp,narg,argv) {};
-  virtual void pre_exchange();
+  FixNeighHistoryOMP(class LAMMPS *lmp, int narg, char **argv);
+  void pre_exchange_onesided();
+  void pre_exchange_newton();
+  void pre_exchange_no_newton();
+  void post_neighbor();
 };
 
 }
diff --git a/src/USER-OMP/fix_qeq_reax_omp.cpp b/src/USER-OMP/fix_qeq_reax_omp.cpp
index 4457ab6592b90f246426416d42450284423daef5..d89c9627fe30a79d2b6d3ee2e3052fe322becf35 100644
--- a/src/USER-OMP/fix_qeq_reax_omp.cpp
+++ b/src/USER-OMP/fix_qeq_reax_omp.cpp
@@ -703,7 +703,7 @@ void FixQEqReaxOMP::calculate_Q()
       q[i] = s[i] - u * t[i];
 
       // backup s & t
-      for (int k = 4; k > 0; --k) {
+      for (int k = nprev-1; k > 0; --k) {
         s_hist[i][k] = s_hist[i][k-1];
         t_hist[i][k] = t_hist[i][k-1];
       }
diff --git a/src/USER-OMP/fix_shear_history_omp.cpp b/src/USER-OMP/fix_shear_history_omp.cpp
deleted file mode 100644
index 4180e0af41ada456b73db4a6973bd2dff5c2c766..0000000000000000000000000000000000000000
--- a/src/USER-OMP/fix_shear_history_omp.cpp
+++ /dev/null
@@ -1,170 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include <string.h>
-#include <stdio.h>
-#include "fix_shear_history_omp.h"
-#include "atom.h"
-#include "comm.h"
-#include "neighbor.h"
-#include "neigh_list.h"
-#include "force.h"
-#include "pair.h"
-#include "update.h"
-#include "modify.h"
-#include "error.h"
-
-#if defined(_OPENMP)
-#include <omp.h>
-#endif
-
-using namespace LAMMPS_NS;
-using namespace FixConst;
-
-/* ----------------------------------------------------------------------
-   copy shear partner info from neighbor lists to per-atom arrays
-   so it can be exchanged with those atoms
-------------------------------------------------------------------------- */
-
-void FixShearHistoryOMP::pre_exchange()
-{
-  const int nthreads = comm->nthreads;
-  maxtouch = 0;
-
-#if defined(_OPENMP)
-#pragma omp parallel default(none)
-#endif
-  {
-
-#if defined(_OPENMP)
-    const int tid = omp_get_thread_num();
-#else
-    const int tid = 0;
-#endif
-
-    int i,j,ii,jj,m,n,inum,jnum;
-    int *ilist,*jlist,*numneigh,**firstneigh;
-    int *touch,**firsttouch;
-    double *shear,*shearj,*allshear,**firstshear;
-
-    MyPage <tagint> &ipg = ipage[tid];
-    MyPage <double> &dpg = dpage[tid];
-    ipg.reset();
-    dpg.reset();
-
-    // 1st loop over neighbor list
-    // calculate nparter for each owned atom
-
-    tagint *tag = atom->tag;
-
-    NeighList *list = pair->list;
-    inum = list->inum;
-    ilist = list->ilist;
-    numneigh = list->numneigh;
-    firstneigh = list->firstneigh;
-    firsttouch = list->listhistory->firstneigh;
-    firstshear = list->listhistory->firstdouble;
-
-    int nlocal_neigh = 0;
-    if (inum) nlocal_neigh = ilist[inum-1] + 1;
-
-    // each thread works on a fixed chunk of local and ghost atoms.
-    const int ldelta = 1 + nlocal_neigh/nthreads;
-    const int lfrom = tid*ldelta;
-    const int lmax = lfrom +ldelta;
-    const int lto = (lmax > nlocal_neigh) ? nlocal_neigh : lmax;
-
-    // zero npartners for all current atoms and
-    // clear page data structures for this thread
-
-    for (i = lfrom; i < lto; i++) npartner[i] = 0;
-
-
-    for (ii = 0; ii < inum; ii++) {
-      i = ilist[ii];
-      jlist = firstneigh[i];
-      jnum = numneigh[i];
-      touch = firsttouch[i];
-
-      for (jj = 0; jj < jnum; jj++) {
-        if (touch[jj]) {
-          if ((i >= lfrom) && (i < lto))
-            npartner[i]++;
-
-          j = jlist[jj];
-          j &= NEIGHMASK;
-          if ((j >= lfrom) && (j < lto))
-            npartner[j]++;
-        }
-      }
-    }
-
-    // get page chunks to store atom IDs and shear history for my atoms
-
-    for (ii = 0; ii < inum; ii++) {
-      i = ilist[ii];
-      if ((i >= lfrom) && (i < lto)) {
-        n = npartner[i];
-        partner[i] = ipg.get(n);
-        shearpartner[i] = dpg.get(dnum*n);
-        if (partner[i] == NULL || shearpartner[i] == NULL)
-          error->one(FLERR,"Shear history overflow, boost neigh_modify one");
-      }
-    }
-
-    // 2nd loop over neighbor list
-    // store atom IDs and shear history for my atoms
-    // re-zero npartner to use as counter for all my atoms
-
-    for (i = lfrom; i < lto; i++) npartner[i] = 0;
-
-    for (ii = 0; ii < inum; ii++) {
-      i = ilist[ii];
-      jlist = firstneigh[i];
-      allshear = firstshear[i];
-      jnum = numneigh[i];
-      touch = firsttouch[i];
-
-      for (jj = 0; jj < jnum; jj++) {
-        if (touch[jj]) {
-          shear = &allshear[3*jj];
-          j = jlist[jj];
-          j &= NEIGHMASK;
-
-          if ((i >= lfrom) && (i < lto)) {
-            m = npartner[i]++;
-            partner[i][m] = tag[j];
-            memcpy(&shearpartner[i][dnum*m],shear,dnumbytes);
-          }
-
-          if ((j >= lfrom) && (j < lto)) {
-            m = npartner[j]++;
-            partner[j][m] = tag[i];
-            shearj = &shearpartner[j][dnum*m];
-            for (n = 0; n < dnum; n++) shearj[n] = -shear[n];
-          }
-        }
-      }
-    }
-
-    // set maxtouch = max # of partners of any owned atom
-    maxtouch = m = 0;
-    for (i = lfrom; i < lto; i++)
-      m = MAX(m,npartner[i]);
-
-#if defined(_OPENMP)
-#pragma omp critical
-#endif
-    maxtouch = MAX(m,maxtouch);
-  }
-}
diff --git a/src/USER-OMP/npair_half_respa_bin_newtoff_omp.cpp b/src/USER-OMP/npair_half_respa_bin_newtoff_omp.cpp
index 45add87092de8fcc505624dad338e0328af90d05..f094691b71da6e7dc291bde662a88251f79e796a 100644
--- a/src/USER-OMP/npair_half_respa_bin_newtoff_omp.cpp
+++ b/src/USER-OMP/npair_half_respa_bin_newtoff_omp.cpp
@@ -45,12 +45,10 @@ void NPairHalfRespaBinNewtoffOmp::build(NeighList *list)
 
   NPAIR_OMP_INIT;
 
-  NeighList *listinner = list->listinner;
-  NeighList *listmiddle = list->listmiddle;
   const int respamiddle = list->respamiddle;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(none) shared(list,listinner,listmiddle)
+#pragma omp parallel default(none) shared(list)
 #endif
   NPAIR_OMP_SETUP(nlocal);
 
@@ -77,26 +75,26 @@ void NPairHalfRespaBinNewtoffOmp::build(NeighList *list)
   int *numneigh = list->numneigh;
   int **firstneigh = list->firstneigh;
 
-  int *ilist_inner = listinner->ilist;
-  int *numneigh_inner = listinner->numneigh;
-  int **firstneigh_inner = listinner->firstneigh;
+  int *ilist_inner = list->ilist_inner;
+  int *numneigh_inner = list->numneigh_inner;
+  int **firstneigh_inner = list->firstneigh_inner;
 
   int *ilist_middle,*numneigh_middle,**firstneigh_middle;
   if (respamiddle) {
-    ilist_middle = listmiddle->ilist;
-    numneigh_middle = listmiddle->numneigh;
-    firstneigh_middle = listmiddle->firstneigh;
+    ilist_middle = list->ilist_middle;
+    numneigh_middle = list->numneigh_middle;
+    firstneigh_middle = list->firstneigh_middle;
   }
 
   // each thread has its own page allocator
   MyPage<int> &ipage = list->ipage[tid];
-  MyPage<int> &ipage_inner = listinner->ipage[tid];
+  MyPage<int> &ipage_inner = list->ipage_inner[tid];
   ipage.reset();
   ipage_inner.reset();
 
   MyPage<int> *ipage_middle;
   if (respamiddle) {
-    ipage_middle = listmiddle->ipage + tid;
+    ipage_middle = list->ipage_middle + tid;
     ipage_middle->reset();
   }
 
@@ -199,6 +197,6 @@ void NPairHalfRespaBinNewtoffOmp::build(NeighList *list)
   }
   NPAIR_OMP_CLOSE;
   list->inum = nlocal;
-  listinner->inum = nlocal;
-  if (respamiddle) listmiddle->inum = nlocal;
+  list->inum_inner = nlocal;
+  if (respamiddle) list->inum_middle = nlocal;
 }
diff --git a/src/USER-OMP/npair_half_respa_bin_newton_omp.cpp b/src/USER-OMP/npair_half_respa_bin_newton_omp.cpp
index ee6b9b7501235f8bdcd425b4eb9340be286980aa..de7ef5f7d52b4b04d11dde4a16212de34dee8941 100644
--- a/src/USER-OMP/npair_half_respa_bin_newton_omp.cpp
+++ b/src/USER-OMP/npair_half_respa_bin_newton_omp.cpp
@@ -44,12 +44,10 @@ void NPairHalfRespaBinNewtonOmp::build(NeighList *list)
 
   NPAIR_OMP_INIT;
 
-  NeighList *listinner = list->listinner;
-  NeighList *listmiddle = list->listmiddle;
   const int respamiddle = list->respamiddle;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(none) shared(list,listinner,listmiddle)
+#pragma omp parallel default(none) shared(list)
 #endif
   NPAIR_OMP_SETUP(nlocal);
 
@@ -76,26 +74,26 @@ void NPairHalfRespaBinNewtonOmp::build(NeighList *list)
   int *numneigh = list->numneigh;
   int **firstneigh = list->firstneigh;
 
-  int *ilist_inner = listinner->ilist;
-  int *numneigh_inner = listinner->numneigh;
-  int **firstneigh_inner = listinner->firstneigh;
+  int *ilist_inner = list->ilist_inner;
+  int *numneigh_inner = list->numneigh_inner;
+  int **firstneigh_inner = list->firstneigh_inner;
 
   int *ilist_middle,*numneigh_middle,**firstneigh_middle;
   if (respamiddle) {
-    ilist_middle = listmiddle->ilist;
-    numneigh_middle = listmiddle->numneigh;
-    firstneigh_middle = listmiddle->firstneigh;
+    ilist_middle = list->ilist_middle;
+    numneigh_middle = list->numneigh_middle;
+    firstneigh_middle = list->firstneigh_middle;
   }
 
   // each thread has its own page allocator
   MyPage<int> &ipage = list->ipage[tid];
-  MyPage<int> &ipage_inner = listinner->ipage[tid];
+  MyPage<int> &ipage_inner = list->ipage_inner[tid];
   ipage.reset();
   ipage_inner.reset();
 
   MyPage<int> *ipage_middle;
   if (respamiddle) {
-    ipage_middle = listmiddle->ipage + tid;
+    ipage_middle = list->ipage_middle + tid;
     ipage_middle->reset();
   }
 
@@ -245,6 +243,6 @@ void NPairHalfRespaBinNewtonOmp::build(NeighList *list)
   }
   NPAIR_OMP_CLOSE;
   list->inum = nlocal;
-  listinner->inum = nlocal;
-  if (respamiddle) listmiddle->inum = nlocal;
+  list->inum_inner = nlocal;
+  if (respamiddle) list->inum_middle = nlocal;
 }
diff --git a/src/USER-OMP/npair_half_respa_bin_newton_tri_omp.cpp b/src/USER-OMP/npair_half_respa_bin_newton_tri_omp.cpp
index fbb512ba646ff5a23201507867d6d84dc4e4ac90..f20d101bc9dbc1f131e5a7435751ed471626266a 100644
--- a/src/USER-OMP/npair_half_respa_bin_newton_tri_omp.cpp
+++ b/src/USER-OMP/npair_half_respa_bin_newton_tri_omp.cpp
@@ -44,12 +44,10 @@ void NPairHalfRespaBinNewtonTriOmp::build(NeighList *list)
 
   NPAIR_OMP_INIT;
 
-  NeighList *listinner = list->listinner;
-  NeighList *listmiddle = list->listmiddle;
   const int respamiddle = list->respamiddle;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(none) shared(list,listinner,listmiddle)
+#pragma omp parallel default(none) shared(list)
 #endif
   NPAIR_OMP_SETUP(nlocal);
 
@@ -76,26 +74,26 @@ void NPairHalfRespaBinNewtonTriOmp::build(NeighList *list)
   int *numneigh = list->numneigh;
   int **firstneigh = list->firstneigh;
 
-  int *ilist_inner = listinner->ilist;
-  int *numneigh_inner = listinner->numneigh;
-  int **firstneigh_inner = listinner->firstneigh;
+  int *ilist_inner = list->ilist_inner;
+  int *numneigh_inner = list->numneigh_inner;
+  int **firstneigh_inner = list->firstneigh_inner;
 
   int *ilist_middle,*numneigh_middle,**firstneigh_middle;
   if (respamiddle) {
-    ilist_middle = listmiddle->ilist;
-    numneigh_middle = listmiddle->numneigh;
-    firstneigh_middle = listmiddle->firstneigh;
+    ilist_middle = list->ilist_middle;
+    numneigh_middle = list->numneigh_middle;
+    firstneigh_middle = list->firstneigh_middle;
   }
 
   // each thread has its own page allocator
   MyPage<int> &ipage = list->ipage[tid];
-  MyPage<int> &ipage_inner = listinner->ipage[tid];
+  MyPage<int> &ipage_inner = list->ipage_inner[tid];
   ipage.reset();
   ipage_inner.reset();
 
   MyPage<int> *ipage_middle;
   if (respamiddle) {
-    ipage_middle = listmiddle->ipage + tid;
+    ipage_middle = list->ipage_middle + tid;
     ipage_middle->reset();
   }
 
@@ -206,6 +204,6 @@ void NPairHalfRespaBinNewtonTriOmp::build(NeighList *list)
   }
   NPAIR_OMP_CLOSE;
   list->inum = nlocal;
-  listinner->inum = nlocal;
-  if (respamiddle) listmiddle->inum = nlocal;
+  list->inum_inner = nlocal;
+  if (respamiddle) list->inum_middle = nlocal;
 }
diff --git a/src/USER-OMP/npair_half_respa_nsq_newtoff_omp.cpp b/src/USER-OMP/npair_half_respa_nsq_newtoff_omp.cpp
index 5ee71bebad9e5668461dcc2b9702eccad7df774f..0f726cdd7f322dafa176884f1510134a2ce5e6a3 100644
--- a/src/USER-OMP/npair_half_respa_nsq_newtoff_omp.cpp
+++ b/src/USER-OMP/npair_half_respa_nsq_newtoff_omp.cpp
@@ -46,12 +46,10 @@ void NPairHalfRespaNsqNewtoffOmp::build(NeighList *list)
 
   NPAIR_OMP_INIT;
 
-  NeighList *listinner = list->listinner;
-  NeighList *listmiddle = list->listmiddle;
   const int respamiddle = list->respamiddle;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(none) shared(list,listinner,listmiddle)
+#pragma omp parallel default(none) shared(list)
 #endif
   NPAIR_OMP_SETUP(nlocal);
 
@@ -80,26 +78,26 @@ void NPairHalfRespaNsqNewtoffOmp::build(NeighList *list)
   int *numneigh = list->numneigh;
   int **firstneigh = list->firstneigh;
 
-  int *ilist_inner = listinner->ilist;
-  int *numneigh_inner = listinner->numneigh;
-  int **firstneigh_inner = listinner->firstneigh;
+  int *ilist_inner = list->ilist_inner;
+  int *numneigh_inner = list->numneigh_inner;
+  int **firstneigh_inner = list->firstneigh_inner;
 
   int *ilist_middle,*numneigh_middle,**firstneigh_middle;
   if (respamiddle) {
-    ilist_middle = listmiddle->ilist;
-    numneigh_middle = listmiddle->numneigh;
-    firstneigh_middle = listmiddle->firstneigh;
+    ilist_middle = list->ilist_middle;
+    numneigh_middle = list->numneigh_middle;
+    firstneigh_middle = list->firstneigh_middle;
   }
 
   // each thread has its own page allocator
   MyPage<int> &ipage = list->ipage[tid];
-  MyPage<int> &ipage_inner = listinner->ipage[tid];
+  MyPage<int> &ipage_inner = list->ipage_inner[tid];
   ipage.reset();
   ipage_inner.reset();
 
   MyPage<int> *ipage_middle;
   if (respamiddle) {
-    ipage_middle = listmiddle->ipage + tid;
+    ipage_middle = list->ipage_middle + tid;
     ipage_middle->reset();
   }
 
@@ -193,6 +191,6 @@ void NPairHalfRespaNsqNewtoffOmp::build(NeighList *list)
   }
   NPAIR_OMP_CLOSE;
   list->inum = nlocal;
-  listinner->inum = nlocal;
-  if (respamiddle) listmiddle->inum = nlocal;
+  list->inum_inner = nlocal;
+  if (respamiddle) list->inum_middle = nlocal;
 }
diff --git a/src/USER-OMP/npair_half_respa_nsq_newton_omp.cpp b/src/USER-OMP/npair_half_respa_nsq_newton_omp.cpp
index 89cff732c950bffa515a7a691cd977dfdb26c378..2783e1255eb1147a15c5ebc04a41905c0a6ac27f 100644
--- a/src/USER-OMP/npair_half_respa_nsq_newton_omp.cpp
+++ b/src/USER-OMP/npair_half_respa_nsq_newton_omp.cpp
@@ -47,12 +47,10 @@ void NPairHalfRespaNsqNewtonOmp::build(NeighList *list)
 
   NPAIR_OMP_INIT;
 
-  NeighList *listinner = list->listinner;
-  NeighList *listmiddle = list->listmiddle;
   const int respamiddle = list->respamiddle;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(none) shared(list,listinner,listmiddle)
+#pragma omp parallel default(none) shared(list)
 #endif
   NPAIR_OMP_SETUP(nlocal);
 
@@ -81,26 +79,26 @@ void NPairHalfRespaNsqNewtonOmp::build(NeighList *list)
   int *numneigh = list->numneigh;
   int **firstneigh = list->firstneigh;
 
-  int *ilist_inner = listinner->ilist;
-  int *numneigh_inner = listinner->numneigh;
-  int **firstneigh_inner = listinner->firstneigh;
+  int *ilist_inner = list->ilist_inner;
+  int *numneigh_inner = list->numneigh_inner;
+  int **firstneigh_inner = list->firstneigh_inner;
 
   int *ilist_middle,*numneigh_middle,**firstneigh_middle;
   if (respamiddle) {
-    ilist_middle = listmiddle->ilist;
-    numneigh_middle = listmiddle->numneigh;
-    firstneigh_middle = listmiddle->firstneigh;
+    ilist_middle = list->ilist_middle;
+    numneigh_middle = list->numneigh_middle;
+    firstneigh_middle = list->firstneigh_middle;
   }
 
   // each thread has its own page allocator
   MyPage<int> &ipage = list->ipage[tid];
-  MyPage<int> &ipage_inner = listinner->ipage[tid];
+  MyPage<int> &ipage_inner = list->ipage_inner[tid];
   ipage.reset();
   ipage_inner.reset();
 
   MyPage<int> *ipage_middle;
   if (respamiddle) {
-    ipage_middle = listmiddle->ipage + tid;
+    ipage_middle = list->ipage_middle + tid;
     ipage_middle->reset();
   }
 
@@ -212,6 +210,6 @@ void NPairHalfRespaNsqNewtonOmp::build(NeighList *list)
   }
   NPAIR_OMP_CLOSE;
   list->inum = nlocal;
-  listinner->inum = nlocal;
-  if (respamiddle) listmiddle->inum = nlocal;
+  list->inum_inner = nlocal;
+  if (respamiddle) list->inum_middle = nlocal;
 }
diff --git a/src/USER-OMP/npair_half_size_bin_newtoff_omp.cpp b/src/USER-OMP/npair_half_size_bin_newtoff_omp.cpp
index 120658b7143f5fe72a97e066e1972d7fdf836187..6a1cb46ea6272d9c312c8c555b74fdbf2eebdbd7 100644
--- a/src/USER-OMP/npair_half_size_bin_newtoff_omp.cpp
+++ b/src/USER-OMP/npair_half_size_bin_newtoff_omp.cpp
@@ -18,9 +18,6 @@
 #include "neigh_list.h"
 #include "atom.h"
 #include "atom_vec.h"
-#include "molecule.h"
-#include "domain.h"
-#include "fix_shear_history.h"
 #include "my_page.h"
 #include "error.h"
 
@@ -34,7 +31,6 @@ NPairHalfSizeBinNewtoffOmp::NPairHalfSizeBinNewtoffOmp(LAMMPS *lmp) :
 /* ----------------------------------------------------------------------
    size particles
    binned neighbor list construction with partial Newton's 3rd law
-   shear history must be accounted for when a neighbor pair is added
    each owned atom i checks own bin and surrounding bins in non-Newton stencil
    pair stored once if i,j are both owned and i < j
    pair stored by me if j is ghost (also stored by proc owning j)
@@ -43,30 +39,20 @@ NPairHalfSizeBinNewtoffOmp::NPairHalfSizeBinNewtoffOmp(LAMMPS *lmp) :
 void NPairHalfSizeBinNewtoffOmp::build(NeighList *list)
 {
   const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
-
-  FixShearHistory * const fix_history = (FixShearHistory *) list->fix_history;
-  NeighList * listhistory = list->listhistory;
+  const int history = list->history;
+  const int mask_history = 3 << SBBITS;
 
   NPAIR_OMP_INIT;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(none) shared(list,listhistory)
+#pragma omp parallel default(none) shared(list)
 #endif
   NPAIR_OMP_SETUP(nlocal);
 
-  int i,j,k,m,n,nn,ibin,dnum,dnumbytes;
+  int i,j,k,m,n,nn,ibin;
   double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
   double radi,radsum,cutsq;
-  int *neighptr,*touchptr;
-  double *shearptr;
-  MyPage<int> *ipage_touch;
-  MyPage<double> *dpage_shear;
-
-  int *npartner;
-  tagint **partner;
-  double **shearpartner;
-  int **firsttouch;
-  double **firstshear;
+  int *neighptr;
 
   // loop over each atom, storing neighbors
 
@@ -85,29 +71,10 @@ void NPairHalfSizeBinNewtoffOmp::build(NeighList *list)
   MyPage<int> &ipage = list->ipage[tid];
   ipage.reset();
 
-  if (fix_history) {
-    npartner = fix_history->npartner;
-    partner = fix_history->partner;
-    shearpartner = fix_history->shearpartner;
-    firsttouch = listhistory->firstneigh;
-    firstshear = listhistory->firstdouble;
-    ipage_touch = listhistory->ipage+tid;
-    dpage_shear = listhistory->dpage+tid;
-    dnum = listhistory->dnum;
-    dnumbytes = dnum * sizeof(double);
-    ipage_touch->reset();
-    dpage_shear->reset();
-  }
-
   for (i = ifrom; i < ito; i++) {
 
     n = 0;
     neighptr = ipage.vget();
-    if (fix_history) {
-      nn = 0;
-      touchptr = ipage_touch->vget();
-      shearptr = dpage_shear->vget();
-    }
 
     xtmp = x[i][0];
     ytmp = x[i][1];
@@ -133,29 +100,10 @@ void NPairHalfSizeBinNewtoffOmp::build(NeighList *list)
         cutsq = (radsum+skin) * (radsum+skin);
 
         if (rsq <= cutsq) {
-          neighptr[n] = j;
-
-          if (fix_history) {
-            if (rsq < radsum*radsum) {
-              for (m = 0; m < npartner[i]; m++)
-                if (partner[i][m] == tag[j]) break;
-              if (m < npartner[i]) {
-                touchptr[n] = 1;
-                memcpy(&shearptr[nn],&shearpartner[i][dnum*m],dnumbytes);
-                nn += dnum;
-              } else {
-                touchptr[n] = 0;
-                memcpy(&shearptr[nn],zeroes,dnumbytes);
-                nn += dnum;
-              }
-            } else {
-              touchptr[n] = 0;
-              memcpy(&shearptr[nn],zeroes,dnumbytes);
-              nn += dnum;
-            }
-          }
-
-          n++;
+          if (history && rsq < radsum*radsum)
+            neighptr[n++] = j ^ mask_history;
+          else
+            neighptr[n++] = j;
         }
       }
     }
@@ -166,13 +114,6 @@ void NPairHalfSizeBinNewtoffOmp::build(NeighList *list)
     ipage.vgot(n);
     if (ipage.status())
       error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-
-    if (fix_history) {
-      firsttouch[i] = touchptr;
-      firstshear[i] = shearptr;
-      ipage_touch->vgot(n);
-      dpage_shear->vgot(nn);
-    }
   }
   NPAIR_OMP_CLOSE;
   list->inum = nlocal;
diff --git a/src/USER-OMP/npair_half_size_bin_newton_omp.cpp b/src/USER-OMP/npair_half_size_bin_newton_omp.cpp
index cf0c6d20fe565c4058dce2ac728854a4a36fcf2e..d8e1e6da449c9e04698dc79df919a802e907cec9 100644
--- a/src/USER-OMP/npair_half_size_bin_newton_omp.cpp
+++ b/src/USER-OMP/npair_half_size_bin_newton_omp.cpp
@@ -18,9 +18,6 @@
 #include "neigh_list.h"
 #include "atom.h"
 #include "atom_vec.h"
-#include "molecule.h"
-#include "domain.h"
-#include "fix_shear_history.h"
 #include "my_page.h"
 #include "error.h"
 
@@ -34,7 +31,6 @@ NPairHalfSizeBinNewtonOmp::NPairHalfSizeBinNewtonOmp(LAMMPS *lmp) :
 /* ----------------------------------------------------------------------
    size particles
    binned neighbor list construction with full Newton's 3rd law
-   shear history must be accounted for when a neighbor pair is added
    each owned atom i checks its own bin and other bins in Newton stencil
    every pair stored exactly once by some processor
 ------------------------------------------------------------------------- */
@@ -42,36 +38,20 @@ NPairHalfSizeBinNewtonOmp::NPairHalfSizeBinNewtonOmp(LAMMPS *lmp) :
 void NPairHalfSizeBinNewtonOmp::build(NeighList *list)
 {
   const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
-
-  FixShearHistory * const fix_history = (FixShearHistory *) list->fix_history;
-  NeighList * listhistory = list->listhistory;
-  if (fix_history) {
-    fix_history->nlocal_neigh = nlocal;
-    fix_history->nall_neigh = nlocal + atom->nghost;
-  }
+  const int history = list->history;
+  const int mask_history = 3 << SBBITS;
 
   NPAIR_OMP_INIT;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(none) shared(list,listhistory)
+#pragma omp parallel default(none) shared(list)
 #endif
   NPAIR_OMP_SETUP(nlocal);
 
-  int i,j,k,m,n,nn,ibin,dnum,dnumbytes;
+  int i,j,k,m,n,nn,ibin;
   double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
   double radi,radsum,cutsq;
-  int *neighptr,*touchptr;
-  double *shearptr;
-  MyPage<int> *ipage_touch;
-  MyPage<double> *dpage_shear;
-
-  int *npartner;
-  tagint **partner;
-  double **shearpartner;
-  int **firsttouch;
-  double **firstshear;
-
-  // loop over each atom, storing neighbors
+  int *neighptr;
 
   double **x = atom->x;
   double *radius = atom->radius;
@@ -88,29 +68,10 @@ void NPairHalfSizeBinNewtonOmp::build(NeighList *list)
   MyPage<int> &ipage = list->ipage[tid];
   ipage.reset();
 
-  if (fix_history) {
-    npartner = fix_history->npartner;
-    partner = fix_history->partner;
-    shearpartner = fix_history->shearpartner;
-    firsttouch = listhistory->firstneigh;
-    firstshear = listhistory->firstdouble;
-    ipage_touch = listhistory->ipage+tid;
-    dpage_shear = listhistory->dpage+tid;
-    dnum = listhistory->dnum;
-    dnumbytes = dnum * sizeof(double);
-    ipage_touch->reset();
-    dpage_shear->reset();
-  }
-
   for (i = ifrom; i < ito; i++) {
 
     n = 0;
     neighptr = ipage.vget();
-    if (fix_history) {
-      nn = 0;
-      touchptr = ipage_touch->vget();
-      shearptr = dpage_shear->vget();
-    }
 
     xtmp = x[i][0];
     ytmp = x[i][1];
@@ -140,29 +101,10 @@ void NPairHalfSizeBinNewtonOmp::build(NeighList *list)
       cutsq = (radsum+skin) * (radsum+skin);
 
       if (rsq <= cutsq) {
-        neighptr[n] = j;
-
-        if (fix_history) {
-          if (rsq < radsum*radsum) {
-            for (m = 0; m < npartner[i]; m++)
-              if (partner[i][m] == tag[j]) break;
-            if (m < npartner[i]) {
-              touchptr[n] = 1;
-              memcpy(&shearptr[nn],&shearpartner[i][dnum*m],dnumbytes);
-              nn += dnum;
-            } else {
-              touchptr[n] = 0;
-              memcpy(&shearptr[nn],zeroes,dnumbytes);
-              nn += dnum;
-            }
-          } else {
-            touchptr[n] = 0;
-            memcpy(&shearptr[nn],zeroes,dnumbytes);
-            nn += dnum;
-          }
-        }
-
-        n++;
+        if (history && rsq < radsum*radsum)
+          neighptr[n++] = j ^ mask_history;
+        else
+          neighptr[n++] = j;
       }
     }
 
@@ -181,29 +123,10 @@ void NPairHalfSizeBinNewtonOmp::build(NeighList *list)
         cutsq = (radsum+skin) * (radsum+skin);
 
         if (rsq <= cutsq) {
-          neighptr[n] = j;
-
-          if (fix_history) {
-            if (rsq < radsum*radsum) {
-              for (m = 0; m < npartner[i]; m++)
-                if (partner[i][m] == tag[j]) break;
-              if (m < npartner[i]) {
-                touchptr[n] = 1;
-                memcpy(&shearptr[nn],&shearpartner[i][dnum*m],dnumbytes);
-                nn += dnum;
-              } else {
-                touchptr[n] = 0;
-                memcpy(&shearptr[nn],zeroes,dnumbytes);
-                nn += dnum;
-              }
-            } else {
-              touchptr[n] = 0;
-              memcpy(&shearptr[nn],zeroes,dnumbytes);
-              nn += dnum;
-            }
-          }
-
-          n++;
+          if (history && rsq < radsum*radsum)
+            neighptr[n++] = j ^ mask_history;
+          else
+            neighptr[n++] = j;
         }
       }
     }
@@ -214,13 +137,6 @@ void NPairHalfSizeBinNewtonOmp::build(NeighList *list)
     ipage.vgot(n);
     if (ipage.status())
       error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-
-    if (fix_history) {
-      firsttouch[i] = touchptr;
-      firstshear[i] = shearptr;
-      ipage_touch->vgot(n);
-      dpage_shear->vgot(nn);
-    }
   }
   NPAIR_OMP_CLOSE;
   list->inum = nlocal;
diff --git a/src/USER-OMP/npair_half_size_bin_newton_tri_omp.cpp b/src/USER-OMP/npair_half_size_bin_newton_tri_omp.cpp
index da04eebd1eb70ac0422e35b1299bc9dd04b58c33..b02bfa345e8b1b9ccceae4e55ba0c482df8e46a2 100644
--- a/src/USER-OMP/npair_half_size_bin_newton_tri_omp.cpp
+++ b/src/USER-OMP/npair_half_size_bin_newton_tri_omp.cpp
@@ -17,8 +17,6 @@
 #include "neigh_list.h"
 #include "atom.h"
 #include "atom_vec.h"
-#include "molecule.h"
-#include "domain.h"
 #include "my_page.h"
 #include "error.h"
 
@@ -32,7 +30,6 @@ NPairHalfSizeBinNewtonTriOmp::NPairHalfSizeBinNewtonTriOmp(LAMMPS *lmp) :
 /* ----------------------------------------------------------------------
    size particles
    binned neighbor list construction with Newton's 3rd law for triclinic
-   no shear history is allowed for this option
    each owned atom i checks its own bin and other bins in triclinic stencil
    every pair stored exactly once by some processor
 ------------------------------------------------------------------------- */
@@ -40,6 +37,8 @@ NPairHalfSizeBinNewtonTriOmp::NPairHalfSizeBinNewtonTriOmp(LAMMPS *lmp) :
 void NPairHalfSizeBinNewtonTriOmp::build(NeighList *list)
 {
   const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
+  const int history = list->history;
+  const int mask_history = 3 << SBBITS;
 
   NPAIR_OMP_INIT;
 #if defined(_OPENMP)
@@ -105,7 +104,12 @@ void NPairHalfSizeBinNewtonTriOmp::build(NeighList *list)
         radsum = radi + radius[j];
         cutsq = (radsum+skin) * (radsum+skin);
 
-        if (rsq <= cutsq) neighptr[n++] = j;
+        if (rsq <= cutsq) {
+          if (history && rsq < radsum*radsum)
+            neighptr[n++] = j ^ mask_history;
+          else
+            neighptr[n++] = j;
+        }
       }
     }
 
diff --git a/src/USER-OMP/npair_half_size_nsq_newtoff_omp.cpp b/src/USER-OMP/npair_half_size_nsq_newtoff_omp.cpp
index f898ec3828b20db8f258a78c6d911c46b6f4b1b1..3c7b6b118f5f9a4da6f324acb9b217097bf69605 100644
--- a/src/USER-OMP/npair_half_size_nsq_newtoff_omp.cpp
+++ b/src/USER-OMP/npair_half_size_nsq_newtoff_omp.cpp
@@ -19,9 +19,6 @@
 #include "atom.h"
 #include "atom_vec.h"
 #include "group.h"
-#include "molecule.h"
-#include "domain.h"
-#include "fix_shear_history.h"
 #include "my_page.h"
 #include "error.h"
 
@@ -44,34 +41,20 @@ void NPairHalfSizeNsqNewtoffOmp::build(NeighList *list)
 {
   const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
   const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0;
-
-  FixShearHistory * const fix_history = (FixShearHistory *) list->fix_history;
-  NeighList * listhistory = list->listhistory;
-  if (fix_history) {
-    fix_history->nlocal_neigh = nlocal;
-    fix_history->nall_neigh = nlocal + atom->nghost;
-  }
+  const int history = list->history;
+  const int mask_history = 3 << SBBITS;
 
   NPAIR_OMP_INIT;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(none) shared(list,listhistory)
+#pragma omp parallel default(none) shared(list)
 #endif
   NPAIR_OMP_SETUP(nlocal);
 
-  int i,j,m,n,nn,dnum,dnumbytes;
+  int i,j,m,n,nn;
   double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
   double radi,radsum,cutsq;
-  int *neighptr,*touchptr;
-  double *shearptr;
-
-  int *npartner;
-  tagint **partner;
-  double **shearpartner;
-  int **firsttouch;
-  double **firstshear;
-  MyPage<int> *ipage_touch;
-  MyPage<double> *dpage_shear;
+  int *neighptr;
 
   double **x = atom->x;
   double *radius = atom->radius;
@@ -89,29 +72,10 @@ void NPairHalfSizeNsqNewtoffOmp::build(NeighList *list)
   MyPage<int> &ipage = list->ipage[tid];
   ipage.reset();
 
-  if (fix_history) {
-    npartner = fix_history->npartner;
-    partner = fix_history->partner;
-    shearpartner = fix_history->shearpartner;
-    firsttouch = listhistory->firstneigh;
-    firstshear = listhistory->firstdouble;
-    ipage_touch = listhistory->ipage+tid;
-    dpage_shear = listhistory->dpage+tid;
-    dnum = listhistory->dnum;
-    dnumbytes = dnum * sizeof(double);
-    ipage_touch->reset();
-    dpage_shear->reset();
-  }
-
   for (i = ifrom; i < ito; i++) {
 
     n = 0;
     neighptr = ipage.vget();
-    if (fix_history) {
-      nn = 0;
-      touchptr = ipage_touch->vget();
-      shearptr = dpage_shear->vget();
-    }
 
     xtmp = x[i][0];
     ytmp = x[i][1];
@@ -132,29 +96,10 @@ void NPairHalfSizeNsqNewtoffOmp::build(NeighList *list)
       cutsq = (radsum+skin) * (radsum+skin);
 
       if (rsq <= cutsq) {
-        neighptr[n] = j;
-
-        if (fix_history) {
-          if (rsq < radsum*radsum) {
-            for (m = 0; m < npartner[i]; m++)
-              if (partner[i][m] == tag[j]) break;
-            if (m < npartner[i]) {
-              touchptr[n] = 1;
-              memcpy(&shearptr[nn],&shearpartner[i][dnum*m],dnumbytes);
-              nn += dnum;
-            } else {
-              touchptr[n] = 0;
-              memcpy(&shearptr[nn],zeroes,dnumbytes);
-              nn += dnum;
-            }
-          } else {
-            touchptr[n] = 0;
-            memcpy(&shearptr[nn],zeroes,dnumbytes);
-            nn += dnum;
-          }
-        }
-
-        n++;
+        if (history && rsq < radsum*radsum)
+          neighptr[n++] = j ^ mask_history;
+        else
+          neighptr[n++] = j;
       }
     }
 
@@ -164,13 +109,6 @@ void NPairHalfSizeNsqNewtoffOmp::build(NeighList *list)
     ipage.vgot(n);
     if (ipage.status())
       error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-
-    if (fix_history) {
-      firsttouch[i] = touchptr;
-      firstshear[i] = shearptr;
-      ipage_touch->vgot(n);
-      dpage_shear->vgot(nn);
-    }
   }
   NPAIR_OMP_CLOSE;
   list->inum = nlocal;
diff --git a/src/USER-OMP/npair_half_size_nsq_newton_omp.cpp b/src/USER-OMP/npair_half_size_nsq_newton_omp.cpp
index a7caac372acb40684b5306a026a6fb805b3ea39e..37a4181af73dd823a9a2eea1475d3c33eb06b15a 100644
--- a/src/USER-OMP/npair_half_size_nsq_newton_omp.cpp
+++ b/src/USER-OMP/npair_half_size_nsq_newton_omp.cpp
@@ -19,9 +19,6 @@
 #include "atom.h"
 #include "atom_vec.h"
 #include "group.h"
-#include "molecule.h"
-#include "domain.h"
-#include "fix_shear_history.h"
 #include "my_page.h"
 #include "error.h"
 
@@ -45,34 +42,20 @@ void NPairHalfSizeNsqNewtonOmp::build(NeighList *list)
 {
   const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
   const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0;;
-
-  FixShearHistory * const fix_history = (FixShearHistory *) list->fix_history;
-  NeighList * listhistory = list->listhistory;
-  if (fix_history) {
-    fix_history->nlocal_neigh = nlocal;
-    fix_history->nall_neigh = nlocal+atom->nghost;
-  }
+  const int history = list->history;
+  const int mask_history = 3 << SBBITS;
 
   NPAIR_OMP_INIT;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(none) shared(list,listhistory)
+#pragma omp parallel default(none) shared(list)
 #endif
   NPAIR_OMP_SETUP(nlocal);
 
-  int i,j,m,n,nn,itag,jtag,dnum,dnumbytes;
+  int i,j,m,n,nn,itag,jtag;
   double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
   double radi,radsum,cutsq;
-  int *neighptr,*touchptr;
-  double *shearptr;
-
-  int *npartner;
-  tagint **partner;
-  double **shearpartner;
-  int **firsttouch;
-  double **firstshear;
-  MyPage<int> *ipage_touch;
-  MyPage<double> *dpage_shear;
+  int *neighptr;
 
   double **x = atom->x;
   double *radius = atom->radius;
@@ -90,29 +73,10 @@ void NPairHalfSizeNsqNewtonOmp::build(NeighList *list)
   MyPage<int> &ipage = list->ipage[tid];
   ipage.reset();
 
-  if (fix_history) {
-    npartner = fix_history->npartner;
-    partner = fix_history->partner;
-    shearpartner = fix_history->shearpartner;
-    firsttouch = listhistory->firstneigh;
-    firstshear = listhistory->firstdouble;
-    ipage_touch = listhistory->ipage+tid;
-    dpage_shear = listhistory->dpage+tid;
-    dnum = listhistory->dnum;
-    dnumbytes = dnum * sizeof(double);
-    ipage_touch->reset();
-    dpage_shear->reset();
-  }
-
   for (i = ifrom; i < ito; i++) {
 
     n = 0;
     neighptr = ipage.vget();
-    if (fix_history) {
-      nn = 0;
-      touchptr = ipage_touch->vget();
-      shearptr = dpage_shear->vget();
-    }
 
     itag = tag[i];
     xtmp = x[i][0];
@@ -150,29 +114,10 @@ void NPairHalfSizeNsqNewtonOmp::build(NeighList *list)
       cutsq = (radsum+skin) * (radsum+skin);
 
       if (rsq <= cutsq) {
-        neighptr[n] = j;
-
-        if (fix_history) {
-          if (rsq < radsum*radsum) {
-            for (m = 0; m < npartner[i]; m++)
-              if (partner[i][m] == tag[j]) break;
-            if (m < npartner[i]) {
-              touchptr[n] = 1;
-              memcpy(&shearptr[nn],&shearpartner[i][dnum*m],dnumbytes);
-              nn += dnum;
-            } else {
-              touchptr[n] = 0;
-              memcpy(&shearptr[nn],zeroes,dnumbytes);
-              nn += dnum;
-            }
-          } else {
-            touchptr[n] = 0;
-            memcpy(&shearptr[nn],zeroes,dnumbytes);
-            nn += dnum;
-          }
-        }
-
-        n++;
+        if (history && rsq < radsum*radsum)
+          neighptr[n++] = j ^ mask_history;
+        else
+          neighptr[n++] = j;
       }
     }
 
@@ -183,12 +128,6 @@ void NPairHalfSizeNsqNewtonOmp::build(NeighList *list)
     if (ipage.status())
       error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
 
-    if (fix_history) {
-      firsttouch[i] = touchptr;
-      firstshear[i] = shearptr;
-      ipage_touch->vgot(n);
-      dpage_shear->vgot(nn);
-    }
   }
   NPAIR_OMP_CLOSE;
   list->inum = nlocal;
diff --git a/src/USER-OMP/pair_buck_long_coul_long_omp.cpp b/src/USER-OMP/pair_buck_long_coul_long_omp.cpp
index 87f9e2e3216596e59fa753db42b51520c677c1e5..f99637240998739282d37d11286fbe09badc9c65 100644
--- a/src/USER-OMP/pair_buck_long_coul_long_omp.cpp
+++ b/src/USER-OMP/pair_buck_long_coul_long_omp.cpp
@@ -319,7 +319,7 @@ void PairBuckLongCoulLongOMP::compute_inner()
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
-  const int inum = listinner->inum;
+  const int inum = list->inum_inner;
 #if defined(_OPENMP)
 #pragma omp parallel default(none)
 #endif
@@ -343,7 +343,7 @@ void PairBuckLongCoulLongOMP::compute_middle()
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
-  const int inum = listmiddle->inum;
+  const int inum = list->inum_middle;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(none)
@@ -373,7 +373,7 @@ void PairBuckLongCoulLongOMP::compute_outer(int eflag, int vflag)
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
-  const int inum = listouter->inum;
+  const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(none) shared(eflag,vflag)
@@ -811,7 +811,7 @@ void PairBuckLongCoulLongOMP::eval_inner(int iifrom, int iito, ThrData * const t
   const double *x0 = x[0];
   double *f0 = f[0], *fi = 0;
 
-  int *ilist = listinner->ilist;
+  int *ilist = list->ilist_inner;
 
   const int newton_pair = force->newton_pair;
 
@@ -835,7 +835,7 @@ void PairBuckLongCoulLongOMP::eval_inner(int iifrom, int iito, ThrData * const t
     memcpy(xi, x0+(i+(i<<1)), sizeof(vector));
     cut_bucksqi = cut_bucksq[typei = type[i]];
     buck1i = buck1[typei]; buck2i = buck2[typei]; rhoinvi = rhoinv[typei];
-    jneighn = (jneigh = listinner->firstneigh[i])+listinner->numneigh[i];
+    jneighn = (jneigh = list->firstneigh_inner[i])+list->numneigh_inner[i];
 
     for (; jneigh<jneighn; ++jneigh) {                        // loop over neighbors
       j = *jneigh;
@@ -904,7 +904,7 @@ void PairBuckLongCoulLongOMP::eval_middle(int iifrom, int iito, ThrData * const
   const double *x0 = x[0];
   double *f0 = f[0], *fi = 0;
 
-  int *ilist = listmiddle->ilist;
+  int *ilist = list->ilist_middle;
 
   const int newton_pair = force->newton_pair;
 
@@ -932,7 +932,7 @@ void PairBuckLongCoulLongOMP::eval_middle(int iifrom, int iito, ThrData * const
     memcpy(xi, x0+(i+(i<<1)), sizeof(vector));
     cut_bucksqi = cut_bucksq[typei = type[i]];
     buck1i = buck1[typei]; buck2i = buck2[typei]; rhoinvi = rhoinv[typei];
-    jneighn = (jneigh = listmiddle->firstneigh[i])+listmiddle->numneigh[i];
+    jneighn = (jneigh = list->firstneigh_middle[i])+list->numneigh_middle[i];
 
     for (; jneigh<jneighn; ++jneigh) {                        // loop over neighbors
       j = *jneigh;
@@ -1009,7 +1009,7 @@ void PairBuckLongCoulLongOMP::eval_outer(int iiform, int iito, ThrData * const t
   const double *x0 = x[0];
   double *f0 = f[0], *fi = f0;
 
-  int *ilist = listouter->ilist;
+  int *ilist = list->ilist;
 
   int i, j, ii;
   int *jneigh, *jneighn, typei, typej, ni, respa_flag;
@@ -1035,7 +1035,7 @@ void PairBuckLongCoulLongOMP::eval_outer(int iiform, int iito, ThrData * const t
     buckai = buck_a[typei]; buckci = buck_c[typei]; rhoinvi = rhoinv[typei];
     cutsqi = cutsq[typei]; cut_bucksqi = cut_bucksq[typei];
     memcpy(xi, x0+(i+(i<<1)), sizeof(vector));
-    jneighn = (jneigh = listouter->firstneigh[i])+listouter->numneigh[i];
+    jneighn = (jneigh = list->firstneigh[i])+list->numneigh[i];
 
     for (; jneigh<jneighn; ++jneigh) {                        // loop over neighbors
       j = *jneigh;
diff --git a/src/USER-OMP/pair_gran_hertz_history_omp.cpp b/src/USER-OMP/pair_gran_hertz_history_omp.cpp
index ac40e29edf0ae4f615d799fe344bb87533d9e7d4..046b71a73aee49f91222b0b787f1c1ea4551f434 100644
--- a/src/USER-OMP/pair_gran_hertz_history_omp.cpp
+++ b/src/USER-OMP/pair_gran_hertz_history_omp.cpp
@@ -14,6 +14,7 @@
 
 #include <math.h>
 #include "pair_gran_hertz_history_omp.h"
+#include "fix_neigh_history.h"
 #include "atom.h"
 #include "comm.h"
 #include "fix.h"
@@ -134,8 +135,8 @@ void PairGranHertzHistoryOMP::eval(int iifrom, int iito, ThrData * const thr)
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
-  firsttouch = list->listhistory->firstneigh;
-  firstshear = list->listhistory->firstdouble;
+  firsttouch = fix_history->firstflag;
+  firstshear = fix_history->firstvalue;
 
   // loop over neighbors of my atoms
 
diff --git a/src/USER-OMP/pair_gran_hooke_history_omp.cpp b/src/USER-OMP/pair_gran_hooke_history_omp.cpp
index e507a63f7cd425f04aa605a7056a4001e7b93372..2e7d55aff059c1e7867cb35edd03bd3d19cf7eb6 100644
--- a/src/USER-OMP/pair_gran_hooke_history_omp.cpp
+++ b/src/USER-OMP/pair_gran_hooke_history_omp.cpp
@@ -14,6 +14,7 @@
 
 #include <math.h>
 #include "pair_gran_hooke_history_omp.h"
+#include "fix_neigh_history.h"
 #include "atom.h"
 #include "comm.h"
 #include "fix.h"
@@ -137,8 +138,8 @@ void PairGranHookeHistoryOMP::eval(int iifrom, int iito, ThrData * const thr)
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
-  firsttouch = listhistory->firstneigh;
-  firstshear = listhistory->firstdouble;
+  firsttouch = fix_history->firstflag;
+  firstshear = fix_history->firstvalue;
 
   // loop over neighbors of my atoms
 
diff --git a/src/USER-OMP/pair_lj_long_coul_long_omp.cpp b/src/USER-OMP/pair_lj_long_coul_long_omp.cpp
index 28d4f229c8c25b0ced8b4d4578fd0468b2c7442f..c0c87e74815896d4282a556035464d0559129d40 100644
--- a/src/USER-OMP/pair_lj_long_coul_long_omp.cpp
+++ b/src/USER-OMP/pair_lj_long_coul_long_omp.cpp
@@ -317,7 +317,7 @@ void PairLJLongCoulLongOMP::compute_inner()
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
-  const int inum = listinner->inum;
+  const int inum = list->inum_inner;
 #if defined(_OPENMP)
 #pragma omp parallel default(none)
 #endif
@@ -341,7 +341,7 @@ void PairLJLongCoulLongOMP::compute_middle()
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
-  const int inum = listmiddle->inum;
+  const int inum = list->inum_middle;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(none)
@@ -371,7 +371,7 @@ void PairLJLongCoulLongOMP::compute_outer(int eflag, int vflag)
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
-  const int inum = listouter->inum;
+  const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(none) shared(eflag,vflag)
@@ -805,7 +805,7 @@ void PairLJLongCoulLongOMP::eval_inner(int iifrom, int iito, ThrData * const thr
   const double *x0 = x[0];
   double *f0 = f[0], *fi = 0;
 
-  int *ilist = listinner->ilist;
+  int *ilist = list->ilist_inner;
 
   const int newton_pair = force->newton_pair;
 
@@ -828,7 +828,7 @@ void PairLJLongCoulLongOMP::eval_inner(int iifrom, int iito, ThrData * const thr
     memcpy(xi, x0+(i+(i<<1)), sizeof(vector));
     cut_ljsqi = cut_ljsq[typei = type[i]];
     lj1i = lj1[typei]; lj2i = lj2[typei];
-    jneighn = (jneigh = listinner->firstneigh[i])+listinner->numneigh[i];
+    jneighn = (jneigh = list->firstneigh_inner[i])+list->numneigh_inner[i];
     for (; jneigh<jneighn; ++jneigh) {                        // loop over neighbors
       j = *jneigh;
       ni = sbmask(j);
@@ -896,7 +896,7 @@ void PairLJLongCoulLongOMP::eval_middle(int iifrom, int iito, ThrData * const th
   const double *x0 = x[0];
   double *f0 = f[0], *fi = 0;
 
-  int *ilist = listmiddle->ilist;
+  int *ilist = list->ilist_middle;
 
   const int newton_pair = force->newton_pair;
 
@@ -925,7 +925,7 @@ void PairLJLongCoulLongOMP::eval_middle(int iifrom, int iito, ThrData * const th
     memcpy(xi, x0+(i+(i<<1)), sizeof(vector));
     cut_ljsqi = cut_ljsq[typei = type[i]];
     lj1i = lj1[typei]; lj2i = lj2[typei];
-    jneighn = (jneigh = listmiddle->firstneigh[i])+listmiddle->numneigh[i];
+    jneighn = (jneigh = list->firstneigh_middle[i])+list->numneigh_middle[i];
 
     for (; jneigh<jneighn; ++jneigh) {
       j = *jneigh;
@@ -1000,7 +1000,7 @@ void PairLJLongCoulLongOMP::eval_outer(int iiform, int iito, ThrData * const thr
   const double *x0 = x[0];
   double *f0 = f[0], *fi = f0;
 
-  int *ilist = listouter->ilist;
+  int *ilist = list->ilist;
 
   int i, j, ii;
   int *jneigh, *jneighn, typei, typej, ni, respa_flag;
@@ -1027,7 +1027,7 @@ void PairLJLongCoulLongOMP::eval_outer(int iiform, int iito, ThrData * const thr
     lj1i = lj1[typei]; lj2i = lj2[typei]; lj3i = lj3[typei]; lj4i = lj4[typei];
     cutsqi = cutsq[typei]; cut_ljsqi = cut_ljsq[typei];
     memcpy(xi, x0+(i+(i<<1)), sizeof(vector));
-    jneighn = (jneigh = listouter->firstneigh[i])+listouter->numneigh[i];
+    jneighn = (jneigh = list->firstneigh[i])+list->numneigh[i];
 
     for (; jneigh<jneighn; ++jneigh) {                        // loop over neighbors
       j = *jneigh;
diff --git a/src/USER-OMP/pair_lj_long_tip4p_long_omp.cpp b/src/USER-OMP/pair_lj_long_tip4p_long_omp.cpp
index 1c8f60d7dcb1c141d69e370fdff8bf0f5aef09d4..c6490b08243254def01c983c0c223dd2fbfb347d 100644
--- a/src/USER-OMP/pair_lj_long_tip4p_long_omp.cpp
+++ b/src/USER-OMP/pair_lj_long_tip4p_long_omp.cpp
@@ -379,7 +379,7 @@ void PairLJLongTIP4PLongOMP::compute_inner()
   for (i = 0; i < nall; i++) hneigh_thr[i].t = 0;
 
   const int nthreads = comm->nthreads;
-  const int inum = listinner->inum;
+  const int inum = list->inum_inner;
 #if defined(_OPENMP)
 #pragma omp parallel default(none)
 #endif
@@ -403,7 +403,7 @@ void PairLJLongTIP4PLongOMP::compute_middle()
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
-  const int inum = listmiddle->inum;
+  const int inum = list->inum_middle;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(none)
@@ -457,7 +457,7 @@ void PairLJLongTIP4PLongOMP::compute_outer(int eflag, int vflag)
   }
 
   const int nthreads = comm->nthreads;
-  const int inum = listouter->inum;
+  const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(none) shared(eflag,vflag)
@@ -1126,9 +1126,9 @@ void PairLJLongTIP4PLongOMP::eval_inner(int iifrom, int iito, ThrData * const th
 
   double *lj1i, *lj2i;
 
-  ilist = listinner->ilist;
-  numneigh = listinner->numneigh;
-  firstneigh = listinner->firstneigh;
+  ilist = list->ilist_inner;
+  numneigh = list->numneigh_inner;
+  firstneigh = list->firstneigh_inner;
 
   // loop over neighbors of my atoms
 
@@ -1388,9 +1388,9 @@ void PairLJLongTIP4PLongOMP::eval_middle(int iifrom, int iito, ThrData * const t
   int ni;
   double *lj1i, *lj2i;
 
-  ilist = listmiddle->ilist;
-  numneigh = listmiddle->numneigh;
-  firstneigh = listmiddle->firstneigh;
+  ilist = list->ilist_middle;
+  numneigh = list->numneigh_middle;
+  firstneigh = list->firstneigh_middle;
 
   // loop over neighbors of my atoms
 
@@ -1656,9 +1656,9 @@ void PairLJLongTIP4PLongOMP::eval_outer(int iifrom, int iito, ThrData * const th
 
   double fxtmp,fytmp,fztmp;
 
-  ilist = listouter->ilist;
-  numneigh = listouter->numneigh;
-  firstneigh = listouter->firstneigh;
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
diff --git a/src/USER-OMP/respa_omp.cpp b/src/USER-OMP/respa_omp.cpp
index 738538a20935ffee2d8077a7713c6bab34b5f0c5..aa4aa65a4fcefec8bc143367d63347306c1c3bc6 100644
--- a/src/USER-OMP/respa_omp.cpp
+++ b/src/USER-OMP/respa_omp.cpp
@@ -108,6 +108,7 @@ void RespaOMP::setup()
   domain->box_too_small_check();
   modify->setup_pre_neighbor();
   neighbor->build();
+  modify->setup_post_neighbor();
   neighbor->ncalls = 0;
 
   // compute all forces
@@ -200,6 +201,7 @@ void RespaOMP::setup_minimal(int flag)
     domain->box_too_small_check();
     modify->setup_pre_neighbor();
     neighbor->build();
+    modify->setup_post_neighbor();
     neighbor->ncalls = 0;
   }
 
@@ -311,6 +313,10 @@ void RespaOMP::recurse(int ilevel)
         }
         neighbor->build();
         timer->stamp(Timer::NEIGH);
+        if (modify->n_post_neighbor) {
+          modify->post_neighbor();
+          timer->stamp(Timer::MODIFY);
+        }
       } else if (ilevel == 0) {
         timer->stamp();
         comm->forward_comm();
diff --git a/src/USER-REAXC/fix_qeq_reax.cpp b/src/USER-REAXC/fix_qeq_reax.cpp
index 9d165f3fd3bbb7903cc1adaa2a592715d4d828f0..d1c4f907714ccce73e048d2b6611988a8a9f54dd 100644
--- a/src/USER-REAXC/fix_qeq_reax.cpp
+++ b/src/USER-REAXC/fix_qeq_reax.cpp
@@ -95,7 +95,7 @@ FixQEqReax::FixQEqReax(LAMMPS *lmp, int narg, char **arg) :
   pack_flag = 0;
   s = NULL;
   t = NULL;
-  nprev = 5;
+  nprev = 4;
 
   Hdia_inv = NULL;
   b_s = NULL;
@@ -817,7 +817,7 @@ void FixQEqReax::calculate_Q()
       q[i] = s[i] - u * t[i];
 
       /* backup s & t */
-      for (k = 4; k > 0; --k) {
+      for (k = nprev-1; k > 0; --k) {
         s_hist[i][k] = s_hist[i][k-1];
         t_hist[i][k] = t_hist[i][k-1];
       }
diff --git a/src/USER-UEF/README b/src/USER-UEF/README
new file mode 100644
index 0000000000000000000000000000000000000000..92b2cee5e32877fbd41e03dbf675c4020690b384
--- /dev/null
+++ b/src/USER-UEF/README
@@ -0,0 +1,47 @@
+USER-UEF is a LAMMPS package for non-equilibrium molecular dynamics
+(NEMD) under diagonal flow fields, including uniaxial and biaxial 
+flow. With this package, simulations under extensional flow may be 
+carried out for an indefinite amount of time. It is an implementation
+of the boundary conditions developed by Matthew Dobson, and also uses
+numerical lattice reduction as was proposed by Thomas Hunt. The 
+lattice reduction algorithm is from Igor Semaev. The package is 
+intended for simulations of homogeneous flows, and integrates the 
+SLLOD equations of motion.
+
+--
+
+This package was created by  David Nicholson (davidanich@gmail.com)
+at the Massachusetts Institute of Technology.
+
+--
+
+The following commands are contained in this package:
+
+fix npt/uef and fix nvt/uef:
+  These commands perform time-integration of the SLLOD equations of
+  motion under constant temperature/pressure with the proper 
+  boundary conditions for extensional flow fields.
+
+compute pressure/uef and compute temp/uef:
+  These commands allow for the evaluation of pressure/ke tensors in
+  the reference frame corresponding to the applied flow field.
+
+dump cfg/uef:
+  This command dumps coordinates in the reference frame corresponding
+  to the applied flow field.
+  
+For more information, visit the documentation page for fix nvt/uef 
+and examine the example scripts in doc/USER/uef/.
+
+--
+
+References:
+
+I. Semaev, Cryptography and Lattices, 181 (2001).
+
+M. Dobson, J. Chem. Phys.,  141, 184103 (2014)
+
+T.A. Hunt, Mol. Simul., 42, 347 (2016).
+
+D.A. Nicholson, G.C. Rutledge, J. Chem. Phys.,145, 244903 (2016)
+
diff --git a/src/USER-UEF/compute_pressure_uef.cpp b/src/USER-UEF/compute_pressure_uef.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..43054d7d3f9652f1770361596fa8c62d954ff898
--- /dev/null
+++ b/src/USER-UEF/compute_pressure_uef.cpp
@@ -0,0 +1,195 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+
+   Contributing author: David Nicholson (MIT)
+------------------------------------------------------------------------- */
+
+#include <mpi.h>
+#include <string.h>
+#include <stdlib.h>
+#include "compute_pressure_uef.h"
+#include "fix_nh_uef.h"
+#include "update.h"
+#include "domain.h"
+#include "modify.h"
+#include "fix.h"
+#include "force.h"
+#include "pair.h"
+#include "bond.h"
+#include "angle.h"
+#include "dihedral.h"
+#include "improper.h"
+#include "kspace.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+/* ----------------------------------------------------------------------
+ * Default values for the ext flags
+ * ----------------------------------------------------------------------*/
+ComputePressureUef::ComputePressureUef(LAMMPS *lmp, int narg, char **arg) :
+  ComputePressure(lmp, narg, arg)
+{
+  ext_flags[0] = true;
+  ext_flags[1] = true;
+  ext_flags[2] = true;
+  in_fix=false;
+}
+
+/* ----------------------------------------------------------------------
+ *  Check for the uef fix
+ * ----------------------------------------------------------------------*/
+void ComputePressureUef::init()
+{
+  ComputePressure::init();
+  // check to make sure the other uef fix is on
+  // borrowed from Pieter's nvt/sllod code
+  int i=0;
+  for (i=0; i<modify->nfix; i++) {
+    if (strcmp(modify->fix[i]->style,"nvt/uef")==0)
+      break;
+    if (strcmp(modify->fix[i]->style,"npt/uef")==0)
+      break;
+  }
+  if (i==modify->nfix)
+    error->all(FLERR,"Can't use compute pressure/uef without defining a fix nvt/npt/uef");
+  ifix_uef=i;
+  ((FixNHUef*) modify->fix[ifix_uef])->get_ext_flags(ext_flags);
+
+  if (strcmp(temperature->style,"temp/uef") != 0)
+    error->warning(FLERR,"The temperature used in compute pressure/ued is not of style temp/uef");
+}
+
+/* ----------------------------------------------------------------------
+ *  Compute pressure in the directions i corresponding to ext_flag[i]=true
+ * ----------------------------------------------------------------------*/
+double ComputePressureUef::compute_scalar()
+{
+
+  temperature->compute_scalar();
+// if all pressures are external the scalar is found as normal
+  if (ext_flags[0] && ext_flags[1] && ext_flags[2])
+    return ComputePressure::compute_scalar();
+
+// otherwise compute the full tensor and average desired components
+  compute_vector();
+  addstep(update->ntimestep+1);
+
+  int k =0;
+  scalar = 0;
+  if (ext_flags[0]) {
+    scalar += vector[0];
+    k++;
+  }
+  if (ext_flags[1]) {
+    scalar += vector[1];
+    k++;
+  }
+  if (ext_flags[2]) {
+    scalar += vector[2];
+    k++;
+  }
+
+  scalar /= k;
+  return scalar;
+}
+
+/* ----------------------------------------------------------------------
+   Compute the pressure tensor in the rotated coordinate system
+------------------------------------------------------------------------- */
+void ComputePressureUef::compute_vector()
+{
+  invoked_vector = update->ntimestep;
+  if (update->vflag_global != invoked_vector)
+    error->all(FLERR,"Virial was not tallied on needed timestep");
+
+  if (force->kspace && kspace_virial && force->kspace->scalar_pressure_flag)
+    error->all(FLERR,"Must use 'kspace_modify pressure/scalar no' for "
+               "tensor components with kspace_style msm");
+
+  // invoke temperature if it hasn't been already
+
+  double *ke_tensor;
+  if (keflag) {
+    if (temperature->invoked_vector != update->ntimestep)
+      temperature->compute_vector();
+    ke_tensor = temperature->vector;
+  }
+
+  if (dimension == 3) {
+    inv_volume = 1.0 / (domain->xprd * domain->yprd * domain->zprd);
+    virial_compute(6,3);
+    if (in_fix)
+      virial_rot(virial,rot);
+    else
+    {
+      double r[3][3];
+      ( (FixNHUef*) modify->fix[ifix_uef])->get_rot(r);
+      virial_rot(virial,r);
+    }
+    if (keflag) {
+      for (int i = 0; i < 6; i++)
+        vector[i] = (ke_tensor[i] + virial[i]) * inv_volume * nktv2p;
+    } else
+      for (int i = 0; i < 6; i++)
+        vector[i] = virial[i] * inv_volume * nktv2p;
+  } else {
+    inv_volume = 1.0 / (domain->xprd * domain->yprd);
+    virial_compute(4,2);
+    if (keflag) {
+      vector[0] = (ke_tensor[0] + virial[0]) * inv_volume * nktv2p;
+      vector[1] = (ke_tensor[1] + virial[1]) * inv_volume * nktv2p;
+      vector[3] = (ke_tensor[3] + virial[3]) * inv_volume * nktv2p;
+      vector[2] = vector[4] = vector[5] = 0.0;
+    } else {
+      vector[0] = virial[0] * inv_volume * nktv2p;
+      vector[1] = virial[1] * inv_volume * nktv2p;
+      vector[3] = virial[3] * inv_volume * nktv2p;
+      vector[2] = vector[4] = vector[5] = 0.0;
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+ * get the current rotation matrix and store it
+------------------------------------------------------------------------- */
+void ComputePressureUef::update_rot()
+{
+    ( (FixNHUef*) modify->fix[ifix_uef])->get_rot(rot);
+}
+
+/* ----------------------------------------------------------------------
+   Transform the pressure tensor to the rotated coordinate system
+   [P]rot = Q.[P].Q^t
+------------------------------------------------------------------------- */
+void ComputePressureUef::virial_rot(double *x, const double r[3][3])
+{
+
+  double t[3][3];
+
+  // [00 10 20 ] [ 0 3 4 ] [00 01 02 ]
+  // [01 11 21 ] [ 3 1 5 ] [10 11 12 ]
+  // [02 12 22 ] [ 4 5 2 ] [20 21 22 ]
+
+  for (int k = 0; k<3; ++k)
+  {
+    t[0][k] = x[0]*r[0][k] + x[3]*r[1][k] + x[4]*r[2][k];
+    t[1][k] = x[3]*r[0][k] + x[1]*r[1][k] + x[5]*r[2][k];
+    t[2][k] = x[4]*r[0][k] + x[5]*r[1][k] + x[2]*r[2][k];
+  }
+  x[0] = r[0][0]*t[0][0] + r[1][0]*t[1][0] + r[2][0]*t[2][0];
+  x[3] = r[0][0]*t[0][1] + r[1][0]*t[1][1] + r[2][0]*t[2][1];
+  x[4] = r[0][0]*t[0][2] + r[1][0]*t[1][2] + r[2][0]*t[2][2];
+  x[1] = r[0][1]*t[0][1] + r[1][1]*t[1][1] + r[2][1]*t[2][1];
+  x[5] = r[0][1]*t[0][2] + r[1][1]*t[1][2] + r[2][1]*t[2][2];
+  x[2] = r[0][2]*t[0][2] + r[1][2]*t[1][2] + r[2][2]*t[2][2];
+}
diff --git a/src/USER-UEF/compute_pressure_uef.h b/src/USER-UEF/compute_pressure_uef.h
new file mode 100644
index 0000000000000000000000000000000000000000..d3a4d3195cdaa6845c880cf0d216488f482c25e5
--- /dev/null
+++ b/src/USER-UEF/compute_pressure_uef.h
@@ -0,0 +1,66 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+
+   Contributing author: David Nicholson (MIT)
+------------------------------------------------------------------------- */
+
+#ifdef COMPUTE_CLASS
+
+ComputeStyle(pressure/uef,ComputePressureUef)
+
+#else
+
+#ifndef LMP_COMPUTE_PRESSURE_UEF_H
+#define LMP_COMPUTE_PRESSURE_UEF_H
+
+#include "compute_pressure.h"
+
+namespace LAMMPS_NS {
+
+class ComputePressureUef : public ComputePressure {
+ public:
+  ComputePressureUef(class LAMMPS *, int, char **);
+  virtual ~ComputePressureUef(){}
+  virtual void init();
+  virtual void compute_vector();
+  virtual double compute_scalar();
+  void update_rot();
+  bool in_fix; //true if this compute is used in fix/nvt/npt
+
+
+ protected:
+  bool ext_flags[3]; // true if used in average output pressure
+  void virial_rot(double*,const double[3][3]);
+  int ifix_uef;
+  double rot[3][3];
+};
+
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+This class inherits most of the warnings from ComputePressure. The
+only additions are:
+
+E: Can't use compute pressure/uef without defining a fix nvt/npt/uef
+
+Self-explanatory.
+
+W: The temperature used in compute pressure/uef is not of style temp/uef
+
+Self-explanatory.
+
+*/
diff --git a/src/USER-UEF/compute_temp_uef.cpp b/src/USER-UEF/compute_temp_uef.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..605552405611711327b9fd0a7409b7edbec70eca
--- /dev/null
+++ b/src/USER-UEF/compute_temp_uef.cpp
@@ -0,0 +1,106 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+
+   Contributing author: David Nicholson (MIT)
+------------------------------------------------------------------------- */
+
+#include <string.h>
+#include <stdlib.h>
+#include "compute_temp_uef.h"
+#include "fix_nh_uef.h"
+#include "update.h"
+#include "modify.h"
+#include "fix.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+/* ----------------------------------------------------------------------
+ * Base constructor initialized to use rotation matrix
+ * ----------------------------------------------------------------------*/
+ComputeTempUef::ComputeTempUef(LAMMPS *lmp, int narg, char **arg) :
+  ComputeTemp(lmp, narg, arg)
+{
+  rot_flag=true;
+}
+
+/* ----------------------------------------------------------------------
+ *  Check for the uef fix
+ * ----------------------------------------------------------------------*/
+void ComputeTempUef::init()
+{
+  ComputeTemp::init();
+  // check to make sure the other uef fix is on
+  // borrowed from Pieter's nvt/sllod code
+  int i=0;
+  for (i=0; i<modify->nfix; i++) {
+    if (strcmp(modify->fix[i]->style,"nvt/uef")==0)
+      break;
+    if (strcmp(modify->fix[i]->style,"npt/uef")==0)
+      break;
+  }
+  if (i==modify->nfix)
+    error->all(FLERR,"Can't use compute temp/uef without defining a fix nvt/npt/uef");
+  ifix_uef=i;
+}
+
+
+/* ----------------------------------------------------------------------
+   Compute the ke tensor in the proper coordinate system
+------------------------------------------------------------------------- */
+void ComputeTempUef::compute_vector()
+{
+  ComputeTemp::compute_vector();
+  if (rot_flag) {
+    double rot[3][3];
+    ( (FixNHUef*) modify->fix[ifix_uef])->get_rot(rot);
+    virial_rot(vector,rot);
+  }
+
+}
+
+/* ----------------------------------------------------------------------
+ * turn the rotation matrix on or off to properly account for the
+ * coordinate system of the velocities
+------------------------------------------------------------------------- */
+void ComputeTempUef::yes_rot()
+{
+  rot_flag =true;
+}
+void ComputeTempUef::no_rot()
+{
+  rot_flag =false;
+}
+
+/* ----------------------------------------------------------------------
+   Transform the pressure tensor to the rotated coordinate system
+   [P]rot = Q.[P].Q^t
+------------------------------------------------------------------------- */
+void ComputeTempUef::virial_rot(double *x, const double r[3][3])
+{
+
+  double t[3][3];
+  // [00 10 20 ] [ 0 3 4 ] [00 01 02 ]
+  // [01 11 21 ] [ 3 1 5 ] [10 11 12 ]
+  // [02 12 22 ] [ 4 5 2 ] [20 21 22 ]
+  for (int k = 0; k<3; ++k) {
+    t[0][k] = x[0]*r[0][k] + x[3]*r[1][k] + x[4]*r[2][k];
+    t[1][k] = x[3]*r[0][k] + x[1]*r[1][k] + x[5]*r[2][k];
+    t[2][k] = x[4]*r[0][k] + x[5]*r[1][k] + x[2]*r[2][k];
+  }
+  x[0] = r[0][0]*t[0][0] + r[1][0]*t[1][0] + r[2][0]*t[2][0];
+  x[3] = r[0][0]*t[0][1] + r[1][0]*t[1][1] + r[2][0]*t[2][1];
+  x[4] = r[0][0]*t[0][2] + r[1][0]*t[1][2] + r[2][0]*t[2][2];
+  x[1] = r[0][1]*t[0][1] + r[1][1]*t[1][1] + r[2][1]*t[2][1];
+  x[5] = r[0][1]*t[0][2] + r[1][1]*t[1][2] + r[2][1]*t[2][2];
+  x[2] = r[0][2]*t[0][2] + r[1][2]*t[1][2] + r[2][2]*t[2][2];
+}
diff --git a/src/USER-UEF/compute_temp_uef.h b/src/USER-UEF/compute_temp_uef.h
new file mode 100644
index 0000000000000000000000000000000000000000..460e2b18c0fc9f1348b654692d7c24515cda5fa8
--- /dev/null
+++ b/src/USER-UEF/compute_temp_uef.h
@@ -0,0 +1,60 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+
+   Contributing author: David Nicholson (MIT)
+------------------------------------------------------------------------- */
+
+#ifdef COMPUTE_CLASS
+
+ComputeStyle(temp/uef,ComputeTempUef)
+
+#else
+
+#ifndef LMP_COMPUTE_TEMP_UEF_H
+#define LMP_COMPUTE_TEMP_UEF_H
+
+#include "compute_temp.h"
+
+namespace LAMMPS_NS {
+
+class ComputeTempUef : public ComputeTemp {
+ public:
+  ComputeTempUef(class LAMMPS *, int, char **);
+  virtual ~ComputeTempUef(){}
+  virtual void init();
+  virtual void compute_vector();
+  void yes_rot();
+  void no_rot();
+
+
+ protected:
+  bool rot_flag;
+  void virial_rot(double*,const double[3][3]);
+  int ifix_uef;
+};
+
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+This class inherits most of the warnings from ComputePressure. The
+only addition is:
+
+E: Can't use compute temp/uef without defining a fix nvt/npt/uef
+
+Self-explanatory.
+
+*/
diff --git a/src/USER-UEF/dump_cfg_uef.cpp b/src/USER-UEF/dump_cfg_uef.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4b0c08275a62a7bb78132fd2ab6a138a0670d11d
--- /dev/null
+++ b/src/USER-UEF/dump_cfg_uef.cpp
@@ -0,0 +1,114 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+
+   Contributing Author: David Nicholson (MIT)
+------------------------------------------------------------------------- */
+
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include "dump_cfg.h"
+#include "atom.h"
+#include "domain.h"
+#include "modify.h"
+#include "compute.h"
+#include "fix.h"
+#include "error.h"
+#include "uef_utils.h"
+#include "dump_cfg_uef.h"
+#include "fix_nh_uef.h"
+
+using namespace LAMMPS_NS;
+
+enum{INT,DOUBLE,STRING,BIGINT};   // same as in DumpCustom
+
+#define UNWRAPEXPAND 10.0
+#define ONEFIELD 32
+#define DELTA 1048576
+
+/* ----------------------------------------------------------------------
+ * base method is mostly fine, just need to find the FixNHUef
+ * ----------------------------------------------------------------------*/
+void DumpCFGUef::init_style()
+{
+  DumpCFG::init_style();
+
+  // check to make sure the other uef fix is on
+  // borrowed from Pieter's nvt/sllod code
+  int i=0;
+  for (i=0; i<modify->nfix; i++)
+  {
+    if (strcmp(modify->fix[i]->style,"nvt/uef")==0)
+      break;
+    if (strcmp(modify->fix[i]->style,"npt/uef")==0)
+      break;
+  }
+  if (i==modify->nfix)
+    error->all(FLERR,"Can't use dump cfg/uef without defining a fix nvt/npt/uef");
+  ifix_uef=i;
+}
+
+/* ----------------------------------------------------------------------
+ * this is really the only difference between the base class and this one.
+ * since the output is in scaled coordinates, changing the simulation box
+ * edges to the flow frame will put coordinates in the flow frame too.
+ * ----------------------------------------------------------------------*/
+
+void DumpCFGUef::write_header(bigint n)
+{
+  // set scale factor used by AtomEye for CFG viz
+  // default = 1.0
+  // for peridynamics, set to pre-computed PD scale factor
+  //   so PD particles mimic C atoms
+  // for unwrapped coords, set to UNWRAPEXPAND (10.0)
+  //   so molecules are not split across periodic box boundaries
+
+  double box[3][3],rot[3][3];
+  ((FixNHUef*) modify->fix[ifix_uef])->get_box(box);
+  ((FixNHUef*) modify->fix[ifix_uef])->get_rot(rot);
+  // rot goes from "lab frame" to "upper triangular frame"
+  // it's transpose takes the simulation box to the flow frame
+  for (int i=0;i<3;i++)
+    for(int j=i+1;j<3;j++)
+    {
+      double t=rot[i][j];
+      rot[i][j]=rot[j][i];
+      rot[j][i]=t;
+    }
+  UEF_utils::mul_m2(rot,box);
+
+
+  double scale = 1.0;
+  if (atom->peri_flag) scale = atom->pdscale;
+  else if (unwrapflag == 1) scale = UNWRAPEXPAND;
+
+  char str[64];
+  sprintf(str,"Number of particles = %s\n",BIGINT_FORMAT);
+  fprintf(fp,str,n);
+  fprintf(fp,"A = %g Angstrom (basic length-scale)\n",scale);
+  // in box[][] columns are cell edges
+  // in H0, rows are cell edges
+  fprintf(fp,"H0(1,1) = %g A\n",box[0][0]);
+  fprintf(fp,"H0(1,2) = %g A\n",box[1][0]);
+  fprintf(fp,"H0(1,3) = %g A\n",box[2][0]);
+  fprintf(fp,"H0(2,1) = %g A\n",box[0][1]);
+  fprintf(fp,"H0(2,2) = %g A\n",box[1][1]);
+  fprintf(fp,"H0(2,3) = %g A\n",box[2][1]);
+  fprintf(fp,"H0(3,1) = %g A\n",box[0][2]);
+  fprintf(fp,"H0(3,2) = %g A\n",box[1][2]);
+  fprintf(fp,"H0(3,3) = %g A\n",box[2][2]);
+  fprintf(fp,".NO_VELOCITY.\n");
+  fprintf(fp,"entry_count = %d\n",nfield-2);
+  for (int i = 0; i < nfield-5; i++)
+    fprintf(fp,"auxiliary[%d] = %s\n",i,auxname[i]);
+}
diff --git a/src/USER-UEF/dump_cfg_uef.h b/src/USER-UEF/dump_cfg_uef.h
new file mode 100644
index 0000000000000000000000000000000000000000..d2881136ad38a4fc363cd9228315cd69fbc987d3
--- /dev/null
+++ b/src/USER-UEF/dump_cfg_uef.h
@@ -0,0 +1,51 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+
+   Contributing Author: David Nicholson (MIT)
+------------------------------------------------------------------------- */
+
+#ifdef DUMP_CLASS
+
+DumpStyle(cfg/uef,DumpCFGUef)
+
+#else
+
+#ifndef LMP_DUMP_CFG_UEF_H
+#define LMP_DUMP_CFG_UEF_H
+
+#include "dump_cfg.h"
+
+namespace LAMMPS_NS {
+
+class DumpCFGUef : public DumpCFG {
+ public:
+  DumpCFGUef(LAMMPS *lmp, int narg, char **arg) :
+    DumpCFG(lmp, narg, arg){}
+  void init_style();
+  void write_header(bigint);
+
+ protected:
+  int ifix_uef;
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+E: Can't use dump cfg/uef without defining a fix nvt/npt/uef
+
+Self-explanatory.
+
+*/
diff --git a/src/USER-UEF/fix_nh_uef.cpp b/src/USER-UEF/fix_nh_uef.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..dd639c34dbc3e4442a16c60875d01587f085ff1d
--- /dev/null
+++ b/src/USER-UEF/fix_nh_uef.cpp
@@ -0,0 +1,820 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+
+   Contributing author: David Nicholson (MIT)
+------------------------------------------------------------------------- */
+
+#include <string.h>
+#include <stdlib.h>
+#include <math.h>
+#include "fix_nh_uef.h"
+#include "atom.h"
+#include "force.h"
+#include "group.h"
+#include "comm.h"
+#include "citeme.h"
+#include "irregular.h"
+#include "modify.h"
+#include "compute.h"
+#include "kspace.h"
+#include "update.h"
+#include "domain.h"
+#include "error.h"
+#include "output.h"
+#include "timer.h"
+#include "neighbor.h"
+#include "compute_pressure_uef.h"
+#include "compute_temp_uef.h"
+#include "uef_utils.h"
+
+using namespace LAMMPS_NS;
+using namespace FixConst;
+
+enum{ISO,ANISO,TRICLINIC};
+
+// citation info
+
+static const char cite_user_uef_package[] =
+  "USER-UEF package:\n\n"
+  "@Article{NicholsonRutledge16,\n"
+  "author = {David A. Nicholson and Gregory C. Rutledge},\n"
+  "title = {Molecular simulation of flow-enhanced nucleation in n-eicosane melts under steady shear and uniaxial extension},\n"
+  "journal = {The Journal of Chemical Physics},\n"
+  "volume = {145},\n"
+  "number = {24},\n"
+  "pages = {244903},\n"
+  "year = {2016}\n"
+  "}\n\n";
+
+/* ----------------------------------------------------------------------
+ * Parse fix specific keywords, do some error checking, and initalize
+ * temp/pressure fixes
+ ---------------------------------------------------------------------- */
+FixNHUef::FixNHUef(LAMMPS *lmp, int narg, char **arg) :
+  FixNH(lmp, narg, arg), uefbox(NULL)
+{
+  if (lmp->citeme) lmp->citeme->add(cite_user_uef_package);
+
+  //initialization
+
+  erate[0] = erate[1] = 0;
+
+  // default values
+
+  strain[0]=strain[1]= 0;
+  ext_flags[0]=ext_flags[1]=ext_flags[2] = true;
+
+  // need to initialize these
+
+  omega_dot[0]=omega_dot[1]=omega_dot[2]=0;
+
+  // parse fix nh/uef specific options
+
+  bool erate_flag = false;
+  int iarg = 3;
+
+  while (iarg <narg) {
+    if (strcmp(arg[iarg],"erate")==0) {
+      if (iarg+3 > narg) error->all(FLERR,"Illegal fix nvt/npt/uef command");
+      erate[0] = force->numeric(FLERR,arg[iarg+1]);
+      erate[1] = force->numeric(FLERR,arg[iarg+2]);
+      erate_flag = true;
+      iarg += 3;
+    } else if (strcmp(arg[iarg],"strain")==0) {
+      if (iarg+3 > narg) error->all(FLERR,"Illegal fix nvt/npt/uef command");
+      strain[0] = force->numeric(FLERR,arg[iarg+1]);
+      strain[1] = force->numeric(FLERR,arg[iarg+2]);
+      iarg += 3;
+    } else if (strcmp(arg[iarg],"ext")==0) {
+      if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/uef command");
+      if (strcmp(arg[iarg+1],"x")==0)
+        ext_flags[1] = ext_flags[2] =  false;
+      else if (strcmp(arg[iarg+1],"y")==0)
+        ext_flags[0] = ext_flags[2] =  false;
+      else if (strcmp(arg[iarg+1],"z")==0)
+        ext_flags[0] = ext_flags[1] =  false;
+      else if (strcmp(arg[iarg+1],"xy")==0)
+        ext_flags[2] = false;
+      else if (strcmp(arg[iarg+1],"xz")==0)
+        ext_flags[1] = false;
+      else if (strcmp(arg[iarg+1],"yz")==0)
+        ext_flags[0] = false;
+      else if (strcmp(arg[iarg+1],"xyz")!=0)
+        error->all(FLERR,"Illegal fix nvt/npt/uef command");
+
+      iarg += 2;
+    } else {
+
+      // skip to next argument; argument check for unknown keywords is done in FixNH
+
+      ++iarg;
+    }
+  }
+
+  if (!erate_flag)
+    error->all(FLERR,"Keyword erate must be set for fix npt/npt/uef command");
+
+  if (mtchain_default_flag) mtchain=1;
+
+  if (!domain->triclinic)
+    error->all(FLERR,"Simulation box must be triclinic for fix/nvt/npt/uef");
+
+  // check for conditions that impose a deviatoric stress
+
+  if (pstyle == TRICLINIC)
+    error->all(FLERR,"Only normal stresses can be controlled with fix/nvt/npt/uef");
+  double erate_tmp[3];
+  erate_tmp[0]=erate[0];
+  erate_tmp[1]=erate[1];
+  erate_tmp[2]=-erate[0]-erate[1];
+
+  if (pstyle == ANISO) {
+    if (!(ext_flags[0] & ext_flags[1] & ext_flags[2]))
+      error->all(FLERR,"The ext keyword may only be used with iso pressure control");
+    for (int k=0;k<3;k++)
+      for (int j=0;j<3;j++)
+        if (p_flag[k] && p_flag[j]) {
+          double tol = 1e-6;
+          if ( !nearly_equal(p_start[k],p_start[j],tol)
+               || !nearly_equal(p_stop[k],p_stop[j],tol))
+            error->all(FLERR,"All controlled stresses must have the same "
+                       "value in fix/nvt/npt/uef");
+          if ( !nearly_equal(erate_tmp[k],erate_tmp[j],tol)
+               || !nearly_equal(erate_tmp[k],erate_tmp[j],tol))
+            error->all(FLERR,"Dimensions with controlled stresses must have"\
+                       " same strain rate in fix/nvt/npt/uef");
+        }
+  }
+
+  // conditions that produce a deviatoric stress have already been eliminated.
+
+  deviatoric_flag=0;
+
+  // need pre_exchange and irregular migration
+
+  pre_exchange_flag = 1;
+  irregular = new Irregular(lmp);
+
+  // flag that I change the box here (in case of nvt)
+
+  box_change_shape = 1;
+
+  // initialize the UEFBox class which computes the box at each step
+
+  uefbox = new UEF_utils::UEFBox();
+  uefbox->set_strain(strain[0],strain[1]);
+
+  // reset fixedpoint to the stagnation point. I don't allow fixedpoint
+  // to be set by the user.
+
+  fixedpoint[0] = domain->boxlo[0];
+  fixedpoint[1] = domain->boxlo[1];
+  fixedpoint[2] = domain->boxlo[2];
+
+  // Create temp and pressure computes for nh/uef
+
+  int n = strlen(id) + 6;
+  id_temp = new char[n];
+  strcpy(id_temp,id);
+  strcat(id_temp,"_temp");
+  char **newarg = new char*[3];
+  newarg[0] = id_temp;
+  newarg[1] = (char *) "all";
+  newarg[2] = (char *) "temp/uef";
+  modify->add_compute(3,newarg);
+  delete [] newarg;
+  tcomputeflag = 1;
+
+  n = strlen(id) + 7;
+  id_press = new char[n];
+  strcpy(id_press,id);
+  strcat(id_press,"_press");
+  newarg = new char*[4];
+  newarg[0] = id_press;
+  newarg[1] = (char *) "all";
+  newarg[2] = (char *) "pressure/uef";
+  newarg[3] = id_temp;
+  modify->add_compute(4,newarg);
+  delete [] newarg;
+  pcomputeflag = 1;
+
+  nevery = 1;
+}
+
+/* ----------------------------------------------------------------------
+ * Erase the UEFBox object and get rid of the pressure compute if the nvt
+ * version is being used. Everything else will be done in base destructor
+ * ---------------------------------------------------------------------- */
+FixNHUef::~FixNHUef()
+{
+  delete uefbox;
+  if (pcomputeflag && !pstat_flag)
+  {
+    modify->delete_compute(id_press);
+    delete [] id_press;
+  }
+}
+
+/* ----------------------------------------------------------------------
+ * Make the end_of_step() routine callable
+ * ---------------------------------------------------------------------- */
+int FixNHUef::setmask()
+{
+  int mask = FixNH::setmask();
+  mask |= END_OF_STEP;
+  return mask;
+}
+
+/* ----------------------------------------------------------------------
+ * Run FixNH::init() and do more error checking. Set the pressure
+ * pointer in the case that the nvt version is used
+ * ---------------------------------------------------------------------- */
+void FixNHUef::init()
+{
+  FixNH::init();
+
+
+  // find conflict with fix/deform or other box chaging fixes
+  for (int i=0; i < modify->nfix; i++)
+  {
+    if (strcmp(modify->fix[i]->id,id) != 0)
+      if (modify->fix[i]->box_change_shape != 0)
+        error->all(FLERR,"Can't use another fix which changes box shape with fix/nvt/npt/uef");
+  }
+
+
+  // this will make the pressure compute for nvt
+  if (!pstat_flag)
+    if (pcomputeflag) {
+      int icomp = modify->find_compute(id_press);
+      if (icomp<0)
+        error->all(FLERR,"Pressure ID for fix/nvt/uef doesn't exist");
+      pressure = modify->compute[icomp];
+
+      if (strcmp(pressure->style,"pressure/uef") != 0)
+        error->all(FLERR,"Using fix nvt/npt/uef without a compute pressure/uef");
+    }
+
+  if (strcmp(temperature->style,"temp/uef") != 0)
+    error->all(FLERR,"Using fix nvt/npt/uef without a compute temp/uef");
+}
+
+/* ----------------------------------------------------------------------
+ * Run FixNH::setup() make sure the box is OK and set the rotation matrix
+ * for the first step
+ * ---------------------------------------------------------------------- */
+void FixNHUef::setup(int j)
+{
+  double box[3][3];
+  double vol = domain->xprd * domain->yprd * domain->zprd;
+  uefbox->get_box(box,vol);
+  double tol = 1e-4;
+  // ensure the box is ok for uef
+  bool isok = true;
+  isok &= nearly_equal(domain->h[0],box[0][0],tol);
+  isok &= nearly_equal(domain->h[1],box[1][1],tol);
+  isok &= nearly_equal(domain->h[2],box[2][2],tol);
+  isok &= nearly_equal(domain->xy,box[0][1],tol);
+  isok &= nearly_equal(domain->yz,box[1][2],tol);
+  isok &= nearly_equal(domain->xz,box[0][2],tol);
+  if (!isok)
+    error->all(FLERR,"Initial box is not close enough to the expected uef box");
+
+  uefbox->get_rot(rot);
+  ((ComputeTempUef*) temperature)->yes_rot();
+  ((ComputePressureUef*) pressure)->in_fix = true;
+  ((ComputePressureUef*) pressure)->update_rot();
+  FixNH::setup(j);
+}
+
+/* ----------------------------------------------------------------------
+ * rotate -> initial integration step -> rotate back
+ * ---------------------------------------------------------------------- */
+void FixNHUef::initial_integrate(int vflag)
+{
+  inv_rotate_x(rot);
+  inv_rotate_v(rot);
+  inv_rotate_f(rot);
+  ((ComputeTempUef*) temperature)->no_rot();
+  FixNH::initial_integrate(vflag);
+  rotate_x(rot);
+  rotate_v(rot);
+  rotate_f(rot);
+  ((ComputeTempUef*) temperature)->yes_rot();
+}
+
+/* ----------------------------------------------------------------------
+ * rotate -> initial integration step -> rotate back (RESPA)
+ * ---------------------------------------------------------------------- */
+void FixNHUef::initial_integrate_respa(int vflag, int ilevel, int iloop)
+{
+  inv_rotate_x(rot);
+  inv_rotate_v(rot);
+  inv_rotate_f(rot);
+  ((ComputeTempUef*) temperature)->no_rot();
+  FixNH::initial_integrate_respa(vflag,ilevel,iloop);
+  rotate_x(rot);
+  rotate_v(rot);
+  rotate_f(rot);
+  ((ComputeTempUef*) temperature)->yes_rot();
+}
+
+/* ----------------------------------------------------------------------
+ * rotate -> final integration step -> rotate back
+ * ---------------------------------------------------------------------- */
+void FixNHUef::final_integrate()
+{
+  // update rot here since it must directly follow the virial calculation
+  ((ComputePressureUef*) pressure)->update_rot();
+  inv_rotate_v(rot);
+  inv_rotate_f(rot);
+  ((ComputeTempUef*) temperature)->no_rot();
+  FixNH::final_integrate();
+  rotate_v(rot);
+  rotate_f(rot);
+  ((ComputeTempUef*) temperature)->yes_rot();
+}
+
+/* ----------------------------------------------------------------------
+ * at outer level: call this->final_integrate()
+ * at other levels: rotate -> 2nd verlet step -> rotate back
+ * ---------------------------------------------------------------------- */
+void FixNHUef::final_integrate_respa(int ilevel, int iloop)
+{
+  // set timesteps by level
+  dtf = 0.5 * step_respa[ilevel] * force->ftm2v;
+  dthalf = 0.5 * step_respa[ilevel];
+  // outermost level - update eta_dot and omega_dot, apply via final_integrate
+  // all other levels - NVE update of v
+  if (ilevel == nlevels_respa-1) final_integrate();
+  else
+  {
+    inv_rotate_v(rot);
+    inv_rotate_f(rot);
+    nve_v();
+    rotate_v(rot);
+    rotate_f(rot);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   SLLOD velocity update in time-reversible (i think) increments
+   v -> exp(-edot*dt/2)*v
+   v -> v +f/m*dt
+   v -> exp(-edot*dt/2)*v
+-----------------------------------------------------------------------*/
+void FixNHUef::nve_v()
+{
+  double dtfm;
+  double **v = atom->v;
+  double **f = atom->f;
+  double *rmass = atom->rmass;
+  double *mass = atom->mass;
+  int *type = atom->type;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+  double ex = erate[0]*dtf/2;
+  double ey = erate[1]*dtf/2;
+  double ez = -ex-ey;
+  double e0 = exp(-ex);
+  double e1 = exp(-ey);
+  double e2 = exp(-ez);
+  if (igroup == atom->firstgroup) nlocal = atom->nfirst;
+
+  if (rmass) {
+    for (int i = 0; i < nlocal; i++) {
+      if (mask[i] & groupbit) {
+        dtfm = dtf / rmass[i];
+        v[i][0] *= e0;
+        v[i][1] *= e1;
+        v[i][2] *= e2;
+        v[i][0] += dtfm*f[i][0];
+        v[i][1] += dtfm*f[i][1];
+        v[i][2] += dtfm*f[i][2];
+        v[i][0] *= e0;
+        v[i][1] *= e1;
+        v[i][2] *= e2;
+      }
+    }
+  } else {
+    for (int i = 0; i < nlocal; i++) {
+      if (mask[i] & groupbit) {
+        dtfm = dtf / mass[type[i]];
+        v[i][0] *= e0;
+        v[i][1] *= e1;
+        v[i][2] *= e2;
+        v[i][0] += dtfm*f[i][0];
+        v[i][1] += dtfm*f[i][1];
+        v[i][2] += dtfm*f[i][2];
+        v[i][0] *= e0;
+        v[i][1] *= e1;
+        v[i][2] *= e2;
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   Don't actually move atoms in remap(), just change the box
+-----------------------------------------------------------------------*/
+void FixNHUef::remap()
+{
+  double vol = domain->xprd * domain->yprd * domain->zprd;
+  double domega = dto*(omega_dot[0]+omega_dot[1]+omega_dot[2])/3.;
+
+  // constant volume strain associated with barostat
+  // box scaling
+  double ex = dto*omega_dot[0]-domega;
+  double ey = dto*omega_dot[1]-domega;
+  uefbox->step_deform(ex,ey);
+  strain[0] += ex;
+  strain[1] += ey;
+
+  // volume change
+  vol = vol*exp(3*domega);
+  double box[3][3];
+  uefbox->get_box(box,vol);
+  domain->boxhi[0] = domain->boxlo[0]+box[0][0];
+  domain->boxhi[1] = domain->boxlo[1]+box[1][1];
+  domain->boxhi[2] = domain->boxlo[2]+box[2][2];
+  domain->xy = box[0][1];
+  domain->xz = box[0][2];
+  domain->yz = box[1][2];
+  domain->set_global_box();
+  domain->set_local_box();
+  uefbox->get_rot(rot);
+}
+
+/* ----------------------------------------------------------------------
+   SLLOD position update in time-reversible (i think) increments
+   x -> exp(edot*dt/2)*x
+   x -> x + v*dt
+   x -> exp(edot*dt/2)*x
+-----------------------------------------------------------------------*/
+void FixNHUef::nve_x()
+{
+  double **x = atom->x;
+  double **v = atom->v;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+  double ex = erate[0]*dtv;
+  strain[0] += ex;
+  double e0 = exp((ex+omega_dot[0]*dtv)/2);
+  double ey = erate[1]*dtv;
+  strain[1] += ey;
+  double e1 = exp((ey+omega_dot[1]*dtv)/2.);
+  double ez = -ex -ey;
+  double e2 = exp((ez+omega_dot[2]*dtv)/2.);
+  if (igroup == atom->firstgroup) nlocal = atom->nfirst;
+
+  // x update by full step only for atoms in group
+  for (int i = 0; i < nlocal; i++) {
+    if (mask[i] & groupbit) {
+      x[i][0] *= e0;
+      x[i][1] *= e1;
+      x[i][2] *= e2;
+      x[i][0] += dtv * v[i][0];
+      x[i][1] += dtv * v[i][1];
+      x[i][2] += dtv * v[i][2];
+      x[i][0] *= e0;
+      x[i][1] *= e1;
+      x[i][2] *= e2;
+    }
+  }
+  uefbox->step_deform(ex,ey);
+  double box[3][3];
+  double vol = domain->xprd * domain->yprd * domain->zprd;
+  uefbox->get_box(box,vol);
+  domain->boxhi[0] = domain->boxlo[0]+box[0][0];
+  domain->boxhi[1] = domain->boxlo[1]+box[1][1];
+  domain->boxhi[2] = domain->boxlo[2]+box[2][2];
+  domain->xy = box[0][1];
+  domain->xz = box[0][2];
+  domain->yz = box[1][2];
+  domain->set_global_box();
+  domain->set_local_box();
+  uefbox->get_rot(rot);
+}
+
+/* ----------------------------------------------------------------------
+ * Do the lattice reduction if necessary.
+-----------------------------------------------------------------------*/
+void FixNHUef::pre_exchange()
+{
+  // only need to reset things if the lattice needs to be reduced
+  if (uefbox->reduce())
+  {
+    // go to lab frame
+    inv_rotate_x(rot);
+    inv_rotate_v(rot);
+    inv_rotate_f(rot);
+    // get & set the new box and rotation matrix
+    double vol = domain->xprd * domain->yprd * domain->zprd;
+    double box[3][3];
+    uefbox->get_box(box,vol);
+    domain->boxhi[0] = domain->boxlo[0]+box[0][0];
+    domain->boxhi[1] = domain->boxlo[1]+box[1][1];
+    domain->boxhi[2] = domain->boxlo[2]+box[2][2];
+    domain->xy = box[0][1];
+    domain->xz = box[0][2];
+    domain->yz = box[1][2];
+    domain->set_global_box();
+    domain->set_local_box();
+    uefbox->get_rot(rot);
+
+    // rotate to the new upper triangular frame
+    rotate_v(rot);
+    rotate_x(rot);
+    rotate_f(rot);
+
+    // put all atoms in the new box
+    double **x = atom->x;
+    imageint *image = atom->image;
+    int nlocal = atom->nlocal;
+    for (int i=0; i<nlocal; i++) domain->remap(x[i],image[i]);
+
+    // move atoms to the right processors
+    domain->x2lamda(atom->nlocal);
+    irregular->migrate_atoms();
+    domain->lamda2x(atom->nlocal);
+  }
+}
+
+/* ----------------------------------------------------------------------
+ * The following are routines to rotate between the lab and upper triangular
+ * (UT) frames. For most of the time the simulation is in the UT frame.
+ * To get to the lab frame, apply the inv_rotate_[..](rot) and to
+ * get back to the UT frame apply rotate_[..](rot).
+ *
+ * Note: the rotate_x() functions also apply a shift to/from the fixedpoint
+ * to make the integration a little simpler.
+ * ---------------------------------------------------------------------- */
+void FixNHUef::rotate_x(double r[3][3])
+{
+  double **x = atom->x;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+  if (igroup == atom->firstgroup) nlocal = atom->nfirst;
+
+  double xn[3];
+  for (int i=0;i<nlocal;i++)
+  {
+    if (mask[i] & groupbit)
+    {
+      xn[0]=r[0][0]*x[i][0]+r[0][1]*x[i][1]+r[0][2]*x[i][2];
+      xn[1]=r[1][0]*x[i][0]+r[1][1]*x[i][1]+r[1][2]*x[i][2];
+      xn[2]=r[2][0]*x[i][0]+r[2][1]*x[i][1]+r[2][2]*x[i][2];
+      x[i][0]=xn[0]+domain->boxlo[0];
+      x[i][1]=xn[1]+domain->boxlo[1];
+      x[i][2]=xn[2]+domain->boxlo[2];
+    }
+  }
+}
+
+void FixNHUef::inv_rotate_x(double r[3][3])
+{
+  double **x = atom->x;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+  if (igroup == atom->firstgroup) nlocal = atom->nfirst;
+
+  double xn[3];
+  for (int i=0;i<nlocal;i++)
+  {
+    if (mask[i] & groupbit)
+    {
+      x[i][0] -= domain->boxlo[0];
+      x[i][1] -= domain->boxlo[1];
+      x[i][2] -= domain->boxlo[2];
+      xn[0]=r[0][0]*x[i][0]+r[1][0]*x[i][1]+r[2][0]*x[i][2];
+      xn[1]=r[0][1]*x[i][0]+r[1][1]*x[i][1]+r[2][1]*x[i][2];
+      xn[2]=r[0][2]*x[i][0]+r[1][2]*x[i][1]+r[2][2]*x[i][2];
+      x[i][0]=xn[0];
+      x[i][1]=xn[1];
+      x[i][2]=xn[2];
+    }
+  }
+}
+
+void FixNHUef::rotate_v(double r[3][3])
+{
+  double **v = atom->v;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+  if (igroup == atom->firstgroup) nlocal = atom->nfirst;
+
+  double vn[3];
+  for (int i=0;i<nlocal;i++)
+  {
+    if (mask[i] & groupbit)
+    {
+      vn[0]=r[0][0]*v[i][0]+r[0][1]*v[i][1]+r[0][2]*v[i][2];
+      vn[1]=r[1][0]*v[i][0]+r[1][1]*v[i][1]+r[1][2]*v[i][2];
+      vn[2]=r[2][0]*v[i][0]+r[2][1]*v[i][1]+r[2][2]*v[i][2];
+      v[i][0]=vn[0]; v[i][1]=vn[1]; v[i][2]=vn[2];
+    }
+  }
+}
+
+void FixNHUef::inv_rotate_v(double r[3][3])
+{
+  double **v = atom->v;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+  if (igroup == atom->firstgroup) nlocal = atom->nfirst;
+
+  double vn[3];
+  for (int i=0;i<nlocal;i++)
+  {
+    if (mask[i] & groupbit)
+    {
+      vn[0]=r[0][0]*v[i][0]+r[1][0]*v[i][1]+r[2][0]*v[i][2];
+      vn[1]=r[0][1]*v[i][0]+r[1][1]*v[i][1]+r[2][1]*v[i][2];
+      vn[2]=r[0][2]*v[i][0]+r[1][2]*v[i][1]+r[2][2]*v[i][2];
+      v[i][0]=vn[0]; v[i][1]=vn[1]; v[i][2]=vn[2];
+    }
+  }
+}
+
+void FixNHUef::rotate_f(double r[3][3])
+{
+  double **f = atom->f;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+  if (igroup == atom->firstgroup) nlocal = atom->nfirst;
+
+  double fn[3];
+  for (int i=0;i<nlocal;i++)
+  {
+    if (mask[i] & groupbit)
+    {
+      fn[0]=r[0][0]*f[i][0]+r[0][1]*f[i][1]+r[0][2]*f[i][2];
+      fn[1]=r[1][0]*f[i][0]+r[1][1]*f[i][1]+r[1][2]*f[i][2];
+      fn[2]=r[2][0]*f[i][0]+r[2][1]*f[i][1]+r[2][2]*f[i][2];
+      f[i][0]=fn[0]; f[i][1]=fn[1]; f[i][2]=fn[2];
+    }
+  }
+}
+
+void FixNHUef::inv_rotate_f(double r[3][3])
+{
+  double **f = atom->f;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+  if (igroup == atom->firstgroup) nlocal = atom->nfirst;
+  double fn[3];
+  for (int i=0;i<nlocal;i++)
+  {
+    if (mask[i] & groupbit)
+    {
+      fn[0]=r[0][0]*f[i][0]+r[1][0]*f[i][1]+r[2][0]*f[i][2];
+      fn[1]=r[0][1]*f[i][0]+r[1][1]*f[i][1]+r[2][1]*f[i][2];
+      fn[2]=r[0][2]*f[i][0]+r[1][2]*f[i][1]+r[2][2]*f[i][2];
+      f[i][0]=fn[0]; f[i][1]=fn[1]; f[i][2]=fn[2];
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+ * Increase the size of the restart list to add in the strains
+ * ---------------------------------------------------------------------- */
+int FixNHUef::size_restart_global()
+{
+  return FixNH::size_restart_global() +2;
+}
+
+/* ----------------------------------------------------------------------
+ * Pack the strains after packing the default FixNH values
+ * ---------------------------------------------------------------------- */
+int FixNHUef::pack_restart_data(double *list)
+{
+  int n = FixNH::pack_restart_data(list);
+  list[n++] = strain[0];
+  list[n++] = strain[1];
+  return n;
+}
+
+/* ----------------------------------------------------------------------
+ * read and set the strains after the default FixNH values
+ * ---------------------------------------------------------------------- */
+void FixNHUef::restart(char *buf)
+{
+  int n = size_restart_global();
+  FixNH::restart(buf);
+  double *list = (double *) buf;
+  strain[0] = list[n-2];
+  strain[1] = list[n-1];
+  uefbox->set_strain(strain[0],strain[1]);
+}
+
+/* ----------------------------------------------------------------------
+ * If the step writes a restart, reduce the box beforehand. This makes sure
+ * the unique box shape can be found once the restart is read and that
+ * all of the atoms lie within the box.
+ * This may only be necessary for RESPA runs, but I'm leaving it in anyway.
+ * ---------------------------------------------------------------------- */
+void FixNHUef::end_of_step()
+{
+  if (update->ntimestep==output->next_restart)
+  {
+    pre_exchange();
+    domain->x2lamda(atom->nlocal);
+    domain->pbc();
+    timer->stamp();
+    comm->exchange();
+    comm->borders();
+    domain->lamda2x(atom->nlocal+atom->nghost);
+    timer->stamp(Timer::COMM);
+    neighbor->build();
+    timer->stamp(Timer::NEIGH);
+  }
+}
+
+/* ----------------------------------------------------------------------
+ * reduce the simulation box after a run is complete. otherwise it won't
+ * be possible to resume from a write_restart since the initialization of
+ * the simulation box requires reduced simulation box
+ * ---------------------------------------------------------------------- */
+void FixNHUef::post_run()
+{
+  pre_exchange();
+  domain->x2lamda(atom->nlocal);
+  domain->pbc();
+  timer->stamp();
+  comm->exchange();
+  comm->borders();
+  domain->lamda2x(atom->nlocal+atom->nghost);
+  timer->stamp(Timer::COMM);
+  neighbor->build();
+  timer->stamp(Timer::NEIGH);
+}
+
+/* ----------------------------------------------------------------------
+ * public read for rotation matrix
+ * ---------------------------------------------------------------------- */
+void FixNHUef::get_rot(double r[3][3])
+{
+  r[0][0] = rot[0][0];
+  r[0][1] = rot[0][1];
+  r[0][2] = rot[0][2];
+  r[1][0] = rot[1][0];
+  r[1][1] = rot[1][1];
+  r[1][2] = rot[1][2];
+  r[2][0] = rot[2][0];
+  r[2][1] = rot[2][1];
+  r[2][2] = rot[2][2];
+}
+
+/* ----------------------------------------------------------------------
+ * public read for ext flags
+ * ---------------------------------------------------------------------- */
+void FixNHUef::get_ext_flags(bool* e)
+{
+  e[0] = ext_flags[0];
+  e[1] = ext_flags[1];
+  e[2] = ext_flags[2];
+}
+
+/* ----------------------------------------------------------------------
+ * public read for simulation box
+ * ---------------------------------------------------------------------- */
+void FixNHUef::get_box(double b[3][3])
+{
+  double box[3][3];
+  double vol = domain->xprd * domain->yprd * domain->zprd;
+  uefbox->get_box(box,vol);
+  b[0][0] = box[0][0];
+  b[0][1] = box[0][1];
+  b[0][2] = box[0][2];
+  b[1][0] = box[1][0];
+  b[1][1] = box[1][1];
+  b[1][2] = box[1][2];
+  b[2][0] = box[2][0];
+  b[2][1] = box[2][1];
+  b[2][2] = box[2][2];
+}
+
+/* ----------------------------------------------------------------------
+ * comparing floats
+ * it's imperfect, but should work provided no infinities
+ * ---------------------------------------------------------------------- */
+bool FixNHUef::nearly_equal(double a, double b, double epsilon)
+{
+  double absa = fabs(a);
+  double absb = fabs(b);
+  double diff = fabs(a-b);
+  if (a == b) return true;
+  else if ( (absa+absb) < epsilon)
+    return diff < epsilon*epsilon;
+  else
+    return diff/(absa+absb) < epsilon;
+}
diff --git a/src/USER-UEF/fix_nh_uef.h b/src/USER-UEF/fix_nh_uef.h
new file mode 100644
index 0000000000000000000000000000000000000000..43f5bb46a9f2c97c6f3a5f784506d3c9839aed56
--- /dev/null
+++ b/src/USER-UEF/fix_nh_uef.h
@@ -0,0 +1,128 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   www.cs.sandia.gov/~sjplimp/lammps.html
+   Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+
+   Contributing Author: David Nicholson (MIT)
+------------------------------------------------------------------------- */
+
+
+#ifndef LMP_FIX_NH_UEF_H
+#define LMP_FIX_NH_UEF_H
+
+#include "fix_nh.h"
+
+namespace LAMMPS_NS {
+  // forward declaration
+  namespace UEF_utils {
+    class UEFBox;
+  };
+
+class FixNHUef : public FixNH {
+ public:
+  FixNHUef(class LAMMPS *, int, char **);
+  virtual ~FixNHUef();
+  virtual int setmask();
+  virtual void init();
+  virtual void setup(int);
+  virtual void pre_exchange();
+  virtual int pack_restart_data(double*);
+  virtual void restart(char *);
+  virtual void end_of_step();
+  virtual void initial_integrate(int);
+  virtual void final_integrate();
+  virtual void initial_integrate_respa(int, int, int);
+  virtual void final_integrate_respa(int, int);
+  virtual void post_run();
+  void get_rot(double[3][3]);
+  void get_ext_flags(bool*);
+  void get_box(double[3][3]);
+
+ protected:
+  virtual void remap();
+  virtual int size_restart_global();
+  virtual void nve_x();
+  virtual void nve_v();
+  void rotate_x(double [3][3]);
+  void inv_rotate_x(double[3][3]);
+  void rotate_v(double[3][3]);
+  void inv_rotate_v(double[3][3]);
+  void rotate_f(double[3][3]);
+  void inv_rotate_f(double[3][3]);
+  double strain[2],erate[2]; // strain/strain rate : [e_x, e_y]
+                             // always assume traceless e_z = -e_x-e_y
+
+  int rem;                   //this is for the narg kluge
+
+  UEF_utils::UEFBox *uefbox;      // interface for the special simulation box
+
+  double rot[3][3];          // rotation matrix
+  bool ext_flags[3];         // flags for external "free surfaces"
+  bool nearly_equal(double,double,double);
+  //bool rotate_output;      // experimental feature. Too many issues for now
+};
+
+}
+
+#endif
+
+/* ERROR/WARNING messages:
+
+This is a base class for FixNH so it will inherit most of its error/warning messages along with the following:
+
+E: Illegal fix nvt/npt/uef command
+
+Self-explanatory
+
+E: Keyword erate must be set for fix nvt/npt/uef command
+
+Self-explanatory.
+
+E: Simulation box must be triclinic for fix/nvt/npt/uef
+
+Self-explanatory.
+
+E: Only normal stresses can be controlled with fix/nvt/npt/uef
+
+The keywords xy xz and yz cannot be used for pressure control
+
+E: The ext keyword may only be used with iso pressure control
+
+Self-explanatory
+
+E: All controlled stresses must have the same value in fix/nvt/npt/uef
+
+Stress control is only possible when the stress specified for each dimension is the same
+
+E: Dimensions with controlled stresses must have same strain rate in fix/nvt/npt/uef
+
+Stress-controlled dimensions with the same strain rate must have the same target stress.
+
+E: Can't use another fix which changes box shape with fix/nvt/npt/uef
+
+The fix npt/nvt/uef command must have full control over the box shape. You cannot use a simultaneous fix deform command, for example.
+
+E: Pressure ID for fix/nvt/uef doesn't exist
+
+The compute pressure introduced via fix_modify does not exist
+
+E: Using fix nvt/npt/uef without a compute pressure/uef
+
+Self-explanatory.
+
+E: Using fix nvt/npt/uef without a compute temp/uef
+
+Self-explanatory.
+
+E: Initial box is not close enough to the expected uef box
+
+The initial box does not correspond to the shape required by the value of the strain keyword. If the default strain value of zero was used, the initial box is not cubic.
+
+*/
diff --git a/src/USER-UEF/fix_npt_uef.cpp b/src/USER-UEF/fix_npt_uef.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b8ab1eccf92c6b044e9d8274d2ebf1d6eece8f38
--- /dev/null
+++ b/src/USER-UEF/fix_npt_uef.cpp
@@ -0,0 +1,28 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+
+   Contributing author: David Nicholson (MIT)
+------------------------------------------------------------------------- */
+
+#include "fix_npt_uef.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+FixNPTUef::FixNPTUef(LAMMPS *lmp, int narg, char **arg) :
+  FixNHUef(lmp, narg, arg)
+{
+  if (!tstat_flag)
+    error->all(FLERR,"Temperature control must be used with fix npt/uef");
+  if (!pstat_flag)
+    error->all(FLERR,"Pressure control must be used with fix npt/uef");
+}
diff --git a/src/USER-UEF/fix_npt_uef.h b/src/USER-UEF/fix_npt_uef.h
new file mode 100644
index 0000000000000000000000000000000000000000..4c0cd28ae02236e11d678c6fd5cd076dcbf7febc
--- /dev/null
+++ b/src/USER-UEF/fix_npt_uef.h
@@ -0,0 +1,50 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+
+   Contributing author: David Nicholson (MIT)
+------------------------------------------------------------------------- */
+
+#ifdef FIX_CLASS
+
+FixStyle(npt/uef,FixNPTUef)
+
+#else
+
+#ifndef LMP_FIX_NPT_UEF_H
+#define LMP_FIX_NPT_UEF_H
+
+#include "fix_nh_uef.h"
+
+namespace LAMMPS_NS {
+
+class FixNPTUef : public FixNHUef {
+ public:
+  FixNPTUef(class LAMMPS *, int, char **);
+  ~FixNPTUef() {}
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+E: Temperature control must be used with fix npt uef
+
+Self-explanatory.
+
+E: Pressure control must be used with fix npt uef
+
+Self-explanatory.
+
+*/
diff --git a/src/USER-UEF/fix_nvt_uef.cpp b/src/USER-UEF/fix_nvt_uef.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2fd4a2854571969fb49c19d64c77c0ef901787d1
--- /dev/null
+++ b/src/USER-UEF/fix_nvt_uef.cpp
@@ -0,0 +1,30 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+
+   Contributing author: David Nicholson (MIT)
+------------------------------------------------------------------------- */
+
+#include "fix_nvt_uef.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+FixNVTUef::FixNVTUef(LAMMPS *lmp, int narg, char **arg) :
+  FixNHUef(lmp, narg, arg)
+{
+  if (!tstat_flag)
+    error->all(FLERR,"Temperature control must be used with fix nvt/uef");
+  if (pstat_flag)
+    error->all(FLERR,"Pressure control can't be used with fix nvt/uef");
+}
+
+
diff --git a/src/USER-UEF/fix_nvt_uef.h b/src/USER-UEF/fix_nvt_uef.h
new file mode 100644
index 0000000000000000000000000000000000000000..718e36e756780086884c4c451a5f5faa67c29573
--- /dev/null
+++ b/src/USER-UEF/fix_nvt_uef.h
@@ -0,0 +1,50 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+
+   Contributing author: David Nicholson (MIT)
+------------------------------------------------------------------------- */
+
+#ifdef FIX_CLASS
+
+FixStyle(nvt/uef,FixNVTUef)
+
+#else
+
+#ifndef LMP_FIX_NVT_UEF_H
+#define LMP_FIX_NVT_UEF_H
+
+#include "fix_nh_uef.h"
+
+namespace LAMMPS_NS {
+
+class FixNVTUef : public FixNHUef {
+ public:
+  FixNVTUef(class LAMMPS *, int, char **);
+  ~FixNVTUef(){}
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+E: Temperature control must be used with fix npt uef
+
+Self-explanatory.
+
+E: Pressure control must be used with fix npt uef
+
+Self-explanatory.
+
+*/
diff --git a/src/USER-UEF/uef_utils.cpp b/src/USER-UEF/uef_utils.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f270fe1d8b41760aa1236c1e4b478f79a24203ca
--- /dev/null
+++ b/src/USER-UEF/uef_utils.cpp
@@ -0,0 +1,366 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+
+   Contributing author: David Nicholson (MIT)
+-------------------------------------------------------------------------
+
+   This class contains functions to calculate the evolution of the periodic
+   simulation box under elongational flow as described by Matthew Dobson
+   in the arXiv preprint at http://arxiv.org/abs/1408.7078
+
+   Additionally, there are methods to do a lattice reduction to further
+   reduce the simulation box using the method of Igor Semaev at
+   http://link.springer.com/chapter/10.1007%2F3-540-44670-2_13
+*/
+
+#include <math.h>
+#include "uef_utils.h"
+
+namespace LAMMPS_NS {
+  namespace UEF_utils{
+
+UEFBox::UEFBox()
+{
+  // initial box (also an inverse eigenvector matrix of automorphisms)
+  double x = 0.327985277605681;
+  double y = 0.591009048506103;
+  double z = 0.736976229099578;
+  l0[0][0]= z; l0[0][1]= y; l0[0][2]= x;
+  l0[1][0]=-x; l0[1][1]= z; l0[1][2]=-y;
+  l0[2][0]=-y; l0[2][1]= x; l0[2][2]= z;
+  // spectra of the two automorpisms (log of eigenvalues)
+  w1[0]=-1.177725211523360;
+  w1[1]=-0.441448620566067;
+  w1[2]= 1.619173832089425;
+  w2[0]= w1[1];
+  w2[1]= w1[2];
+  w2[2]= w1[0];
+  // initialize theta
+  // strain = w1 * theta1 + w2 * theta2
+  theta[0]=theta[1]=0;
+
+
+  //set up the initial box l and change of basis matrix r
+  for (int k=0;k<3;k++)
+    for (int j=0;j<3;j++)
+    {
+      l[k][j] = l0[k][j];
+      r[j][k]=(j==k);
+    }
+
+  // get the initial rotation and upper triangular matrix
+  rotation_matrix(rot, lrot ,l);
+
+  // this is just a way to calculate the automorphisms
+  // themselves, which play a minor role in the calculations
+  // it's overkill, but only called once
+  double t1[3][3];
+  double t1i[3][3];
+  double t2[3][3];
+  double t2i[3][3];
+  double l0t[3][3];
+  for (int k=0; k<3; ++k)
+    for (int j=0; j<3; ++j)
+    {
+      t1[k][j] = exp(w1[k])*l0[k][j];
+      t1i[k][j] = exp(-w1[k])*l0[k][j];
+      t2[k][j] = exp(w2[k])*l0[k][j];
+      t2i[k][j] = exp(-w2[k])*l0[k][j];
+      l0t[k][j] = l0[j][k];
+    }
+  mul_m2(l0t,t1);
+  mul_m2(l0t,t1i);
+  mul_m2(l0t,t2);
+  mul_m2(l0t,t2i);
+  for (int k=0; k<3; ++k)
+    for (int j=0; j<3; ++j)
+    {
+      a1[k][j] = round(t1[k][j]);
+      a1i[k][j] = round(t1i[k][j]);
+      a2[k][j] = round(t2[k][j]);
+      a2i[k][j] = round(t2i[k][j]);
+    }
+
+  // winv used to transform between
+  // strain increments and theta increments
+  winv[0][0] = w2[1];
+  winv[0][1] = -w2[0];
+  winv[1][0] = -w1[1];
+  winv[1][1] = w1[0];
+  double d = w1[0]*w2[1] - w2[0]*w1[1];
+  for (int k=0;k<2;k++)
+    for (int j=0;j<2;j++)
+      winv[k][j] /= d;
+}
+
+// get volume-correct r basis in: basis*cbrt(vol) = q*r
+void UEFBox::get_box(double x[3][3], double v)
+{
+  v = cbrtf(v);
+  for (int k=0;k<3;k++)
+    for (int j=0;j<3;j++)
+      x[k][j] = lrot[k][j]*v;
+}
+
+// get rotation matrix q in: basis = q*r
+void UEFBox::get_rot(double x[3][3])
+{
+  for (int k=0;k<3;k++)
+    for (int j=0;j<3;j++)
+      x[k][j]=rot[k][j];
+}
+
+// diagonal, incompressible deformation
+void UEFBox::step_deform(const double ex, const double ey)
+{
+  // increment theta values used in the reduction
+  theta[0] +=winv[0][0]*ex + winv[0][1]*ey;
+  theta[1] +=winv[1][0]*ex + winv[1][1]*ey;
+
+  // deformation of the box. reduce() needs to
+  // be called regularly or calculation will become
+  // unstable
+  double eps[3];
+  eps[0]=ex; eps[1] = ey; eps[2] = -ex-ey;
+  for (int k=0;k<3;k++)
+  {
+    eps[k] = exp(eps[k]);
+    l[k][0] = eps[k]*l[k][0];
+    l[k][1] = eps[k]*l[k][1];
+    l[k][2] = eps[k]*l[k][2];
+  }
+  rotation_matrix(rot,lrot, l);
+}
+// reuduce the current basis
+bool UEFBox::reduce()
+{
+  // determine how many times to apply the automorphisms
+  // and find new theta values
+  int f1 = round(theta[0]);
+  int f2 = round(theta[1]);
+  theta[0] -= f1;
+  theta[1] -= f2;
+
+  // store old change or basis matrix to determine if it
+  // changes
+  int r0[3][3];
+  for (int k=0;k<3;k++)
+    for (int j=0;j<3;j++)
+      r0[k][j]=r[k][j];
+
+  // this modifies the old change basis matrix to
+  // handle the case where the automorphism transforms
+  // the box but the reduced basis doesn't change
+  // (r0 should still equal r at the end)
+  if (f1 > 0) for (int k=0;k<f1;k++) mul_m2 (a1,r0);
+  if (f1 < 0) for (int k=0;k<-f1;k++) mul_m2 (a1i,r0);
+  if (f2 > 0) for (int k=0;k<f2;k++) mul_m2 (a2,r0);
+  if (f2 < 0) for (int k=0;k<-f2;k++) mul_m2 (a2i,r0);
+
+  // robust reduction to the box defined by Dobson
+  for (int k=0;k<3;k++)
+  {
+    double eps = exp(theta[0]*w1[k]+theta[1]*w2[k]);
+    l[k][0] = eps*l0[k][0];
+    l[k][1] = eps*l0[k][1];
+    l[k][2] = eps*l0[k][2];
+  }
+  // further reduce the box using greedy reduction and check
+  // if it changed from the last step using the change of basis
+  // matrices r and r0
+  greedy(l,r);
+  rotation_matrix(rot,lrot, l);
+  return !mat_same(r,r0);
+}
+void UEFBox::set_strain(const double ex, const double ey)
+{
+  theta[0]  =winv[0][0]*ex + winv[0][1]*ey;
+  theta[1]  =winv[1][0]*ex + winv[1][1]*ey;
+  theta[0] -= round(theta[0]);
+  theta[1] -= round(theta[1]);
+
+  for (int k=0;k<3;k++)
+  {
+    double eps = exp(theta[0]*w1[k]+theta[1]*w2[k]);
+    l[k][0] = eps*l0[k][0];
+    l[k][1] = eps*l0[k][1];
+    l[k][2] = eps*l0[k][2];
+  }
+  greedy(l,r);
+  rotation_matrix(rot,lrot, l);
+}
+
+// this is just qr reduction using householder reflections
+// m is input matrix, q is a rotation, r is upper triangular
+// q*m = r
+void rotation_matrix(double q[3][3], double r[3][3], const double m[3][3])
+{
+  for (int k=0;k<3;k++)
+    for (int j=0;j<3;j++)
+      r[k][j] = m[k][j];
+
+  double a = -sqrt(col_prod(r,0,0))*r[0][0]/fabs(r[0][0]);
+  double v[3];
+  v[0] = r[0][0]-a;
+  v[1] = r[1][0];
+  v[2] = r[2][0];
+  a = sqrt(v[0]*v[0]+v[1]*v[1]+v[2]*v[2]);
+  v[0] /= a; v[1] /= a; v[2] /= a;
+  double qt[3][3];
+  for (int k=0;k<3;k++)
+    for (int j=0;j<3;j++)
+    {
+      qt[k][j] = (k==j) - 2*v[k]*v[j];
+      q[k][j]= qt[k][j];
+    }
+  mul_m2(qt,r);
+  a = -sqrt(r[1][1]*r[1][1] + r[2][1]*r[2][1])*r[1][1]/fabs(r[1][1]);
+  v[0] = 0;
+  v[1] = r[1][1] - a;
+  v[2] = r[2][1];
+  a = sqrt(v[1]*v[1]+v[2]*v[2]);
+  v[1] /= a;
+  v[2] /= a;
+  for (int k=0;k<3;k++)
+    for (int j=0;j<3;j++)
+      qt[k][j] = (k==j) - 2*v[k]*v[j];
+  mul_m2(qt,r);
+  mul_m2(qt,q);
+  // this makes r have positive diagonals
+  // q*m = r <==> (-q)*m = (-r) will hold row-wise
+  if (r[0][0] < 0){ neg_row(q,0); neg_row(r,0); }
+  if (r[1][1] < 0){ neg_row(q,1); neg_row(r,1); }
+  if (r[2][2] < 0){ neg_row(q,2); neg_row(r,2); }
+}
+
+
+
+//sort columns in order of increasing length
+void col_sort(double b[3][3],int r[3][3])
+{
+  if (col_prod(b,0,0)>col_prod(b,1,1))
+  {
+    col_swap(b,0,1);
+    col_swap(r,0,1);
+  }
+  if (col_prod(b,0,0)>col_prod(b,2,2))
+  {
+    col_swap(b,0,2);
+    col_swap(r,0,2);
+  }
+  if (col_prod(b,1,1)>col_prod(b,2,2))
+  {
+    col_swap(b,1,2);
+    col_swap(r,1,2);
+  }
+}
+
+
+// 1-2 reduction (Graham-Schmidt)
+void red12(double b[3][3],int r[3][3])
+{
+  int y = round(col_prod(b,0,1)/col_prod(b,0,0));
+  b[0][1] -= y*b[0][0];
+  b[1][1] -= y*b[1][0];
+  b[2][1] -= y*b[2][0];
+
+  r[0][1] -= y*r[0][0];
+  r[1][1] -= y*r[1][0];
+  r[2][1] -= y*r[2][0];
+  if (col_prod(b,1,1) < col_prod(b,0,0))
+  {
+    col_swap(b,0,1);
+    col_swap(r,0,1);
+    red12(b,r);
+  }
+}
+
+// The Semaev condition for a 3-reduced basis
+void red3(double b[3][3], int r[3][3])
+{
+  double b11 = col_prod(b,0,0);
+  double b22 = col_prod(b,1,1);
+  double b12 = col_prod(b,0,1);
+  double b13 = col_prod(b,0,2);
+  double b23 = col_prod(b,1,2);
+
+  double y2 =-(b23/b22-b12/b22*b13/b11)/(1-b12/b11*b12/b22);
+  double y1 =-(b13/b11-b12/b11*b23/b22)/(1-b12/b11*b12/b22);
+
+  int x1=0;
+  int x2=0;
+  double min = col_prod(b,2,2);
+  int x1v[2];
+  int x2v[2];
+  x1v[0] = floor(y1); x1v[1] = x1v[0]+1;
+  x2v[0] = floor(y2); x2v[1] = x2v[0]+1;
+  for (int k=0;k<2;k++)
+    for (int j=0;j<2;j++)
+    {
+      double a[3];
+      a[0] = b[0][2] + x1v[k]*b[0][0] + x2v[j]*b[0][1];
+      a[1] = b[1][2] + x1v[k]*b[1][0] + x2v[j]*b[1][1];
+      a[2] = b[2][2] + x1v[k]*b[2][0] + x2v[j]*b[2][1];
+      double val=a[0]*a[0]+a[1]*a[1]+a[2]*a[2];
+      if (val<min)
+      {
+        min = val;
+        x1 = x1v[k];
+        x2 = x2v[j];
+      }
+    }
+  if (x1 || x2)
+  {
+    b[0][2] += x1*b[0][0] + x2*b[0][1];
+    b[1][2] += x1*b[1][0] + x2*b[1][1];
+    b[2][2] += x1*b[2][0] + x2*b[2][1];
+    r[0][2] += x1*r[0][0] + x2*r[0][1];
+    r[1][2] += x1*r[1][0] + x2*r[1][1];
+    r[2][2] += x1*r[2][0] + x2*r[2][1];
+    greedy_recurse(b,r); // note the recursion step is here
+  }
+}
+
+// the meat of the greedy reduction algorithm
+void greedy_recurse(double b[3][3], int r[3][3])
+{
+  col_sort(b,r);
+  red12(b,r);
+  red3(b,r); // recursive caller
+}
+
+// set r (change of basis) to be identity then reduce basis and make it unique
+void greedy(double b[3][3],int r[3][3])
+{
+  r[0][1]=r[0][2]=r[1][0]=r[1][2]=r[2][0]=r[2][1]=0;
+  r[0][0]=r[1][1]=r[2][2]=1;
+  greedy_recurse(b,r);
+  make_unique(b,r);
+}
+
+// A reduced basis isn't unique. This procedure will make it
+// "more" unique. Degenerate cases are possible, but unlikely
+// with floating point math.
+void make_unique(double b[3][3], int r[3][3])
+{
+  if (fabs(b[0][0]) < fabs(b[0][1]))
+  { col_swap(b,0,1); col_swap(r,0,1); }
+  if (fabs(b[0][0]) < fabs(b[0][2]))
+  { col_swap(b,0,2); col_swap(r,0,2); }
+  if (fabs(b[1][1]) < fabs(b[1][2]))
+  { col_swap(b,1,2); col_swap(r,1,2); }
+
+  if (b[0][0] < 0){ neg_col(b,0); neg_col(r,0); }
+  if (b[1][1] < 0){ neg_col(b,1); neg_col(r,1); }
+  if (det(b) < 0){ neg_col(b,2); neg_col(r,2); }
+}
+}}
diff --git a/src/USER-UEF/uef_utils.h b/src/USER-UEF/uef_utils.h
new file mode 100644
index 0000000000000000000000000000000000000000..a16f6fff1a70f1e5e15b3f993efc702deaaf034f
--- /dev/null
+++ b/src/USER-UEF/uef_utils.h
@@ -0,0 +1,131 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+
+   Contributing author: David Nicholson (MIT)
+------------------------------------------------------------------------- */
+
+#ifndef LMP_UEF_UTILS_H
+#define LMP_UEF_UTILS_H
+
+namespace LAMMPS_NS{ namespace UEF_utils {
+
+class UEFBox
+{
+  public:
+    UEFBox();
+    void set_strain(const double, const double);
+    void step_deform(const double,const double);
+    bool reduce();
+    void get_box(double[3][3], double);
+    void get_rot(double[3][3]);
+  private:
+    double l0[3][3]; // initial basis
+    double w1[3],w2[3], winv[3][3]; // omega1 and omega2 (spectra of automorphisms)
+    //double edot[3], delta[2];
+    double theta[2];
+    double l[3][3], rot[3][3], lrot[3][3];
+    int r[3][3],a1[3][3],a2[3][3],a1i[3][3],a2i[3][3];
+};
+
+
+// lattice reduction routines
+void greedy(double[3][3],int[3][3]);
+void col_sort(double[3][3],int[3][3]);
+void red12(double[3][3],int[3][3]);
+void greedy_recurse(double[3][3],int[3][3]);
+void red3(double [3][3],int r[3][3]);
+void make_unique(double[3][3],int[3][3]);
+void rotation_matrix(double[3][3],double[3][3],const double [3][3]);
+
+// A few utility functions for 3x3 arrays
+template<typename T>
+T col_prod(T x[3][3], int c1, int c2)
+{
+  return x[0][c1]*x[0][c2]+x[1][c1]*x[1][c2]+x[2][c1]*x[2][c2];
+}
+
+template<typename T>
+void col_swap(T x[3][3], int c1, int c2)
+{
+  for (int k=0;k<3;k++)
+  {
+    T t = x[k][c2];
+    x[k][c2]=x[k][c1];
+    x[k][c1]=t;
+  }
+}
+
+template<typename T>
+void neg_col(T x[3][3], int c1)
+{
+  x[0][c1] = -x[0][c1];
+  x[1][c1] = -x[1][c1];
+  x[2][c1] = -x[2][c1];
+}
+
+template<typename T>
+void neg_row(T x[3][3], int c1)
+{
+  x[c1][0] = -x[c1][0];
+  x[c1][1] = -x[c1][1];
+  x[c1][2] = -x[c1][2];
+}
+
+template<typename T>
+T det(T x[3][3])
+{
+  double val;
+  val  = x[0][0]*(x[1][1]*x[2][2] - x[1][2]*x[2][1]);
+  val -= x[0][1]*(x[1][0]*x[2][2] - x[1][2]*x[2][0]);
+  val += x[0][2]*(x[1][0]*x[2][1] - x[1][1]*x[2][0]);
+  return val;
+}
+
+template<typename T>
+bool mat_same(T x1[3][3], T x2[3][3])
+{
+  bool v = true;
+  for (int k=0;k<3;k++)
+    for (int j=0;j<3;j++)
+      v &= (x1[k][j]==x2[k][j]);
+  return v;
+}
+
+template<typename T>
+void mul_m1(T m1[3][3], const T m2[3][3])
+{
+  T t[3][3];
+  for (int k=0;k<3;k++)
+    for (int j=0;j<3;j++)
+      t[k][j]=m1[k][j];
+
+  for (int k=0;k<3;k++)
+    for (int j=0;j<3;j++)
+      m1[k][j] = t[k][0]*m2[0][j] + t[k][1]*m2[1][j] + t[k][2]*m2[2][j];
+}
+
+template<typename T>
+void mul_m2(const T m1[3][3], T m2[3][3])
+{
+  T t[3][3];
+  for (int k=0;k<3;k++)
+    for (int j=0;j<3;j++)
+      t[k][j]=m2[k][j];
+
+  for (int k=0;k<3;k++)
+    for (int j=0;j<3;j++)
+      m2[k][j] = m1[k][0]*t[0][j] + m1[k][1]*t[1][j] + m1[k][2]*t[2][j];
+}
+
+}
+}
+#endif
diff --git a/src/atom.cpp b/src/atom.cpp
index 1191f0f2b557ca769c07cb7971712b2ec33dba3b..7d343a0807798418edec5725ff50b6168a6658d4 100644
--- a/src/atom.cpp
+++ b/src/atom.cpp
@@ -453,12 +453,12 @@ void Atom::create_avec(const char *style, int narg, char **arg, int trysuffix)
   // if molecular system:
   // atom IDs must be defined
   // force atom map to be created
-  // map style may be reset by map_init() and its call to map_style_set()
+  // map style will be reset to array vs hash to by map_init()
 
   molecular = avec->molecular;
   if (molecular && tag_enable == 0)
     error->all(FLERR,"Atom IDs must be used for molecular systems");
-  if (molecular) map_style = 1;
+  if (molecular) map_style = 3;
 }
 
 /* ----------------------------------------------------------------------
@@ -593,6 +593,7 @@ void Atom::modify_params(int narg, char **arg)
                    "Atom_modify map command after simulation box is defined");
       if (strcmp(arg[iarg+1],"array") == 0) map_user = 1;
       else if (strcmp(arg[iarg+1],"hash") == 0) map_user = 2;
+      else if (strcmp(arg[iarg+1],"yes") == 0) map_user = 3;
       else error->all(FLERR,"Illegal atom_modify command");
       map_style = map_user;
       iarg += 2;
diff --git a/src/atom_map.cpp b/src/atom_map.cpp
index bbfe014dec3ae6032ffe973fbb0fd282ef4af1b8..9d257d99de871f599afe709a399d83b08bfdda18 100644
--- a/src/atom_map.cpp
+++ b/src/atom_map.cpp
@@ -298,12 +298,12 @@ int Atom::map_style_set()
   MPI_Allreduce(&max,&map_tag_max,1,MPI_LMP_TAGINT,MPI_MAX,world);
 
   // set map_style for new map
-  // if user-selected, use that setting
+  // if user-selected to array/hash, use that setting
   // else if map_tag_max > 1M, use hash
   // else use array
 
   int map_style_old = map_style;
-  if (map_user) map_style = map_user;
+  if (map_user == 1 || map_user == 2) map_style = map_user;
   else if (map_tag_max > 1000000) map_style = 2;
   else map_style = 1;
 
diff --git a/src/comm_brick.cpp b/src/comm_brick.cpp
index 3c972b82449377dcdd6c9dbe7c929ca2a584080c..06227b7a84cfeb2e7e230218778a4b60fca411cb 100644
--- a/src/comm_brick.cpp
+++ b/src/comm_brick.cpp
@@ -476,8 +476,7 @@ void CommBrick::forward_comm(int dummy)
     if (sendproc[iswap] != me) {
       if (comm_x_only) {
         if (size_forward_recv[iswap]) {
-          if (size_forward_recv[iswap]) buf = x[firstrecv[iswap]];
-          else buf = NULL;
+          buf = x[firstrecv[iswap]];
           MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
                     recvproc[iswap],0,world,&request);
         }
@@ -547,8 +546,7 @@ void CommBrick::reverse_comm()
           MPI_Irecv(buf_recv,size_reverse_recv[iswap],MPI_DOUBLE,
                     sendproc[iswap],0,world,&request);
         if (size_reverse_send[iswap]) {
-          if (size_reverse_send[iswap]) buf = f[firstrecv[iswap]];
-          else buf = NULL;
+          buf = f[firstrecv[iswap]];
           MPI_Send(buf,size_reverse_send[iswap],MPI_DOUBLE,
                    recvproc[iswap],0,world);
         }
diff --git a/src/create_atoms.cpp b/src/create_atoms.cpp
index 04a2df91f8b252586705d8418cc774afb4cc448c..992049a81f0d73d8182e393d2d1a2ac4343c6d35 100644
--- a/src/create_atoms.cpp
+++ b/src/create_atoms.cpp
@@ -343,6 +343,11 @@ void CreateAtoms::command(int narg, char **arg)
     }
   }
 
+  // Record wall time for atom creation
+
+  MPI_Barrier(world);
+  double time1 = MPI_Wtime();
+
   // clear ghost count and any ghost bonus data internal to AtomVec
   // same logic as beginning of Comm::exchange()
   // do it now b/c creating atoms will overwrite ghost atoms
@@ -509,6 +514,9 @@ void CreateAtoms::command(int narg, char **arg)
     if (domain->triclinic) domain->lamda2x(atom->nlocal);
   }
 
+  MPI_Barrier(world);
+  double time2 = MPI_Wtime();
+
   // clean up
 
   delete ranmol;
@@ -521,12 +529,16 @@ void CreateAtoms::command(int narg, char **arg)
   // print status
 
   if (comm->me == 0) {
-    if (screen)
+    if (screen) {
       fprintf(screen,"Created " BIGINT_FORMAT " atoms\n",
               atom->natoms-natoms_previous);
-    if (logfile)
+      fprintf(screen,"  Time spent = %g secs\n",time2-time1);
+    }
+    if (logfile) {
       fprintf(logfile,"Created " BIGINT_FORMAT " atoms\n",
               atom->natoms-natoms_previous);
+      fprintf(logfile,"  Time spent = %g secs\n",time2-time1);
+    }
   }
 
   // for MOLECULE mode:
diff --git a/src/domain.cpp b/src/domain.cpp
index 427f7785e816c19d8d309abd35b8149113994691..bad503037322e1a51b2cd1a62eeb9290e943a54c 100644
--- a/src/domain.cpp
+++ b/src/domain.cpp
@@ -772,7 +772,7 @@ void Domain::image_check()
       delz = unwrap[i][2] - unwrap[k][2];
 
       if (xperiodic && delx > xprd_half) flag = 1;
-      if (xperiodic && dely > yprd_half) flag = 1;
+      if (yperiodic && dely > yprd_half) flag = 1;
       if (dimension == 3 && zperiodic && delz > zprd_half) flag = 1;
       if (!xperiodic && delx > xprd) flag = 1;
       if (!yperiodic && dely > yprd) flag = 1;
diff --git a/src/dump.cpp b/src/dump.cpp
index 44098298ba53918e9fb38b61a12ae97e46682eda..ddd958c25c6715c5f41512fb542280341839b8c1 100644
--- a/src/dump.cpp
+++ b/src/dump.cpp
@@ -238,7 +238,7 @@ void Dump::init()
     int gcmcflag = 0;
     for (int i = 0; i < modify->nfix; i++)
       if ((strcmp(modify->fix[i]->style,"gcmc") == 0))
-	gcmcflag = 1;
+        gcmcflag = 1;
 
     if (sortcol == 0 && atom->tag_consecutive() && !gcmcflag) {
       tagint *tag = atom->tag;
@@ -898,7 +898,7 @@ void Dump::modify_params(int narg, char **arg)
     } else if (strcmp(arg[iarg],"fileper") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal dump_modify command");
       if (!multiproc)
-	error->all(FLERR,"Cannot use dump_modify fileper "
+        error->all(FLERR,"Cannot use dump_modify fileper "
                    "without % in dump file name");
       int nper = force->inumeric(FLERR,arg[iarg+1]);
       if (nper <= 0) error->all(FLERR,"Illegal dump_modify command");
@@ -973,7 +973,7 @@ void Dump::modify_params(int narg, char **arg)
     } else if (strcmp(arg[iarg],"nfile") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal dump_modify command");
       if (!multiproc)
-	error->all(FLERR,"Cannot use dump_modify nfile "
+        error->all(FLERR,"Cannot use dump_modify nfile "
                    "without % in dump file name");
       int nfile = force->inumeric(FLERR,arg[iarg+1]);
       if (nfile <= 0) error->all(FLERR,"Illegal dump_modify command");
diff --git a/src/finish.cpp b/src/finish.cpp
index 45e9226388d30c4a6cd33fc1f91d75500e323d99..c22ecaae6053c828f896b6ebfae2b8ef471ceb90 100644
--- a/src/finish.cpp
+++ b/src/finish.cpp
@@ -130,7 +130,7 @@ void Finish::end(int flag)
                           atom->natoms);
       if (logfile) fprintf(logfile,fmt1,time_loop,ntasks,update->nsteps,
                            atom->natoms);
-      
+
       // Gromacs/NAMD-style performance metric for suitable unit settings
 
       if ( timeflag && !minflag && !prdflag && !tadflag &&
@@ -144,7 +144,7 @@ void Finish::end(int flag)
         double one_fs = force->femtosecond;
         double t_step = ((double) time_loop) / ((double) update->nsteps);
         double step_t = 1.0/t_step;
-        
+
         if (strcmp(update->unit_style,"lj") == 0) {
           double tau_day = 24.0*3600.0 / t_step * update->dt / one_fs;
           const char perf[] = "Performance: %.3f tau/day, %.3f timesteps/s\n";
@@ -161,7 +161,7 @@ void Finish::end(int flag)
       }
 
       // CPU use on MPI tasks and OpenMP threads
-      
+
       if (timeflag) {
         if (lmp->kokkos) {
           const char fmt2[] =
diff --git a/src/fix.h b/src/fix.h
index 3f32895309ac1cacf3d0e76ed2cebd5e59246179..21dfc955a87b85bed87c4703e2b44d4c6a252d5f 100644
--- a/src/fix.h
+++ b/src/fix.h
@@ -113,6 +113,7 @@ class Fix : protected Pointers {
   virtual void setup(int) {}
   virtual void setup_pre_exchange() {}
   virtual void setup_pre_neighbor() {}
+  virtual void setup_post_neighbor() {}
   virtual void setup_pre_force(int) {}
   virtual void setup_pre_reverse(int, int) {}
   virtual void min_setup(int) {}
@@ -120,6 +121,7 @@ class Fix : protected Pointers {
   virtual void post_integrate() {}
   virtual void pre_exchange() {}
   virtual void pre_neighbor() {}
+  virtual void post_neighbor() {}
   virtual void pre_force(int) {}
   virtual void pre_reverse(int,int) {}
   virtual void post_force(int) {}
@@ -155,6 +157,7 @@ class Fix : protected Pointers {
 
   virtual void min_pre_exchange() {}
   virtual void min_pre_neighbor() {}
+  virtual void min_post_neighbor() {}
   virtual void min_pre_force(int) {}
   virtual void min_pre_reverse(int,int) {}
   virtual void min_post_force(int) {}
@@ -244,25 +247,27 @@ namespace FixConst {
   static const int POST_INTEGRATE =          1<<1;
   static const int PRE_EXCHANGE =            1<<2;
   static const int PRE_NEIGHBOR =            1<<3;
-  static const int PRE_FORCE =               1<<4;
-  static const int PRE_REVERSE =             1<<5;
-  static const int POST_FORCE =              1<<6;
-  static const int FINAL_INTEGRATE =         1<<7;
-  static const int END_OF_STEP =             1<<8;
-  static const int POST_RUN =                1<<9;
-  static const int THERMO_ENERGY =           1<<10;
-  static const int INITIAL_INTEGRATE_RESPA = 1<<11;
-  static const int POST_INTEGRATE_RESPA =    1<<12;
-  static const int PRE_FORCE_RESPA =         1<<13;
-  static const int POST_FORCE_RESPA =        1<<14;
-  static const int FINAL_INTEGRATE_RESPA =   1<<15;
-  static const int MIN_PRE_EXCHANGE =        1<<16;
-  static const int MIN_PRE_NEIGHBOR =        1<<17;
-  static const int MIN_PRE_FORCE =           1<<18;
-  static const int MIN_PRE_REVERSE =         1<<19;
-  static const int MIN_POST_FORCE =          1<<20;
-  static const int MIN_ENERGY =              1<<21;
-  static const int FIX_CONST_LAST =          1<<22;
+  static const int POST_NEIGHBOR =           1<<4;
+  static const int PRE_FORCE =               1<<5;
+  static const int PRE_REVERSE =             1<<6;
+  static const int POST_FORCE =              1<<7;
+  static const int FINAL_INTEGRATE =         1<<8;
+  static const int END_OF_STEP =             1<<9;
+  static const int POST_RUN =                1<<10;
+  static const int THERMO_ENERGY =           1<<11;
+  static const int INITIAL_INTEGRATE_RESPA = 1<<12;
+  static const int POST_INTEGRATE_RESPA =    1<<13;
+  static const int PRE_FORCE_RESPA =         1<<14;
+  static const int POST_FORCE_RESPA =        1<<15;
+  static const int FINAL_INTEGRATE_RESPA =   1<<16;
+  static const int MIN_PRE_EXCHANGE =        1<<17;
+  static const int MIN_PRE_NEIGHBOR =        1<<18;
+  static const int MIN_POST_NEIGHBOR =       1<<19;
+  static const int MIN_PRE_FORCE =           1<<20;
+  static const int MIN_PRE_REVERSE =         1<<21;
+  static const int MIN_POST_FORCE =          1<<22;
+  static const int MIN_ENERGY =              1<<23;
+  static const int FIX_CONST_LAST =          1<<24;
 }
 
 }
diff --git a/src/fix_heat.cpp b/src/fix_heat.cpp
index 97e0ed6a7fd76662087d4db5415d9808ec535c39..846531dbb987eaa3c1870c4c5c20e1ba1448f82c 100644
--- a/src/fix_heat.cpp
+++ b/src/fix_heat.cpp
@@ -127,7 +127,7 @@ void FixHeat::init()
   }
 
   // check for rigid bodies in region (done here for performance reasons)
-  if (modify->check_rigid_region_overlap(groupbit,domain->regions[iregion]))
+  if (iregion >= 0 && modify->check_rigid_region_overlap(groupbit,domain->regions[iregion]))
     error->warning(FLERR,"Cannot apply fix heat to atoms in rigid bodies");
 
   // cannot have 0 atoms in group
diff --git a/src/fix_shear_history.cpp b/src/fix_neigh_history.cpp
similarity index 55%
rename from src/fix_shear_history.cpp
rename to src/fix_neigh_history.cpp
index 17e78830f4e1a7205ca10631792d66eefe0e4d12..322c8d55619abd0ef17b0a7788523f978261e53b 100644
--- a/src/fix_shear_history.cpp
+++ b/src/fix_neigh_history.cpp
@@ -14,7 +14,7 @@
 #include <mpi.h>
 #include <string.h>
 #include <stdio.h>
-#include "fix_shear_history.h"
+#include "fix_neigh_history.h"
 #include "atom.h"
 #include "comm.h"
 #include "neighbor.h"
@@ -29,16 +29,16 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-enum{DEFAULT,NPARTNER,PERPARTNER};
+enum{DEFAULT,NPARTNER,PERPARTNER}; // also set in fix neigh/history/omp
 
 /* ---------------------------------------------------------------------- */
 
-FixShearHistory::FixShearHistory(LAMMPS *lmp, int narg, char **arg) :
+FixNeighHistory::FixNeighHistory(LAMMPS *lmp, int narg, char **arg) :
   Fix(lmp, narg, arg),
-  npartner(NULL), partner(NULL), shearpartner(NULL), pair(NULL), 
-  ipage(NULL), dpage(NULL)
+  npartner(NULL), partner(NULL), valuepartner(NULL), pair(NULL), 
+  ipage_atom(NULL), dpage_atom(NULL), ipage_neigh(NULL), dpage_neigh(NULL)
 {
-  if (narg != 4) error->all(FLERR,"Illegal fix SHEAR_HISTORY command");
+  if (narg != 4) error->all(FLERR,"Illegal fix NEIGH_HISTORY command");
 
   restart_peratom = 1;
   create_attribute = 1;
@@ -48,9 +48,12 @@ FixShearHistory::FixShearHistory(LAMMPS *lmp, int narg, char **arg) :
   dnum = force->inumeric(FLERR,arg[3]);
   dnumbytes = dnum * sizeof(double);
 
+  zeroes = new double[dnum];
+  for (int i = 0; i < dnum; i++) zeroes[i] = 0.0;
+
   onesided = 0;
-  if (strcmp(id,"LINE_SHEAR_HISTORY") == 0) onesided = 1;
-  if (strcmp(id,"TRI_SHEAR_HISTORY") == 0) onesided = 1;
+  if (strcmp(id,"LINE_NEIGH_HISTORY") == 0) onesided = 1;
+  if (strcmp(id,"TRI_NEIGH_HISTORY") == 0) onesided = 1;
 
   if (newton_pair) comm_reverse = 1;   // just for single npartner value
                                        // variable-size history communicated via
@@ -65,11 +68,24 @@ FixShearHistory::FixShearHistory(LAMMPS *lmp, int narg, char **arg) :
 
   pgsize = oneatom = 0;
 
+  // other per-atom vectors
+
+  firstflag = NULL;
+  firstvalue = NULL;
+  maxatom = 0;
+
+  // per-atom and per-neighbor data structs
+
+  ipage_atom = NULL;
+  dpage_atom = NULL;
+  ipage_neigh = NULL;
+  dpage_neigh = NULL;
+
   // initialize npartner to 0 so neighbor list creation is OK the 1st time
 
   int nlocal = atom->nlocal;
   for (int i = 0; i < nlocal; i++) npartner[i] = 0;
-  maxtouch = 0;
+  maxpartner = 0;
 
   nlocal_neigh = nall_neigh = 0;
   commflag = DEFAULT;
@@ -77,7 +93,7 @@ FixShearHistory::FixShearHistory(LAMMPS *lmp, int narg, char **arg) :
 
 /* ---------------------------------------------------------------------- */
 
-FixShearHistory::~FixShearHistory()
+FixNeighHistory::~FixNeighHistory()
 {
   // unregister this fix so atom class doesn't invoke it any more
 
@@ -86,86 +102,111 @@ FixShearHistory::~FixShearHistory()
 
   // delete locally stored arrays
 
+  delete [] zeroes;
+
+  memory->sfree(firstflag);
+  memory->sfree(firstvalue);
+
   memory->destroy(npartner);
   memory->sfree(partner);
-  memory->sfree(shearpartner);
+  memory->sfree(valuepartner);
+
+  delete [] ipage_atom;
+  delete [] dpage_atom;
+  delete [] ipage_neigh;
+  delete [] dpage_neigh;
 
   // to better detect use-after-delete errors
 
+  firstflag = NULL;
+  firstvalue = NULL;
+
   pair = NULL;
   npartner = NULL;
   partner = NULL;
-  shearpartner = NULL;
-
-  delete [] ipage;
-  delete [] dpage;
+  valuepartner = NULL;
 }
 
 /* ---------------------------------------------------------------------- */
 
-int FixShearHistory::setmask()
+int FixNeighHistory::setmask()
 {
   int mask = 0;
   mask |= PRE_EXCHANGE;
   mask |= MIN_PRE_EXCHANGE;
+  mask |= POST_NEIGHBOR;
+  mask |= MIN_POST_NEIGHBOR;
   return mask;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void FixShearHistory::init()
+void FixNeighHistory::init()
 {
   if (atom->tag_enable == 0)
-    error->all(FLERR,"Granular shear history requires atoms have IDs");
+    error->all(FLERR,"Neighbor history requires atoms have IDs");
 
   allocate_pages();
 }
 
 /* ----------------------------------------------------------------------
    create pages if first time or if neighbor pgsize/oneatom has changed
-   note that latter could cause shear history info to be discarded
+   note that latter could cause neighbor history info to be discarded
 ------------------------------------------------------------------------- */
 
-void FixShearHistory::allocate_pages()
+void FixNeighHistory::allocate_pages()
 {
   int create = 0;
-  if (ipage == NULL) create = 1;
+  if (ipage_atom == NULL) create = 1;
   if (pgsize != neighbor->pgsize) create = 1;
   if (oneatom != neighbor->oneatom) create = 1;
 
   if (create) {
-    delete [] ipage;
-    delete [] dpage;
+    delete [] ipage_atom;
+    delete [] dpage_atom;
+    delete [] ipage_neigh;
+    delete [] dpage_neigh;
 
     pgsize = neighbor->pgsize;
     oneatom = neighbor->oneatom;
     int nmypage = comm->nthreads;
-    ipage = new MyPage<tagint>[nmypage];
-    dpage = new MyPage<double>[nmypage];
+    ipage_atom = new MyPage<tagint>[nmypage];
+    dpage_atom = new MyPage<double>[nmypage];
+    ipage_neigh = new MyPage<int>[nmypage];
+    dpage_neigh = new MyPage<double>[nmypage];
     for (int i = 0; i < nmypage; i++) {
-      ipage[i].init(oneatom,pgsize);
-      dpage[i].init(dnum*oneatom,dnum*pgsize);
+      ipage_atom[i].init(oneatom,pgsize);
+      dpage_atom[i].init(dnum*oneatom,dnum*pgsize);
+      ipage_neigh[i].init(oneatom,pgsize);
+      dpage_neigh[i].init(dnum*oneatom,dnum*pgsize);
     }
   }
 }
 
+/* ---------------------------------------------------------------------- */
+
+void FixNeighHistory::setup_post_neighbor()
+{
+  post_neighbor();
+}
+
 /* ----------------------------------------------------------------------
-   copy shear partner info from neighbor lists to atom arrays
-   should be called whenever neighbor list stores current history info
-     and need to store the info with owned atoms
-   e.g. so atoms can migrate to new procs or between runs
-     when atoms may be added or deleted (neighbor list becomes out-of-date)
-   the next granular neigh list build will put this info back into neigh list
+   copy partner info from neighbor data structs (NDS) to atom arrays
+   should be called whenever NDS store current history info
+     and need to transfer the info to owned atoms
+   e.g. when atoms migrate to new procs, new neigh list built, or between runs
+     when atoms may be added or deleted (NDS becomes out-of-date)
+   the next post_neighbor() will put this info back into new NDS
    called during run before atom exchanges, including for restart files
    called at end of run via post_run()
    do not call during setup of run (setup_pre_exchange)
-     b/c there is no guarantee of a current neigh list (even on continued run)
+     b/c there is no guarantee of a current NDS (even on continued run)
    if run command does a 2nd run with pre = no, then no neigh list
      will be built, but old neigh list will still have the info
    onesided and newton on and newton off versions
 ------------------------------------------------------------------------- */
 
-void FixShearHistory::pre_exchange()
+void FixNeighHistory::pre_exchange()
 {
   if (onesided) pre_exchange_onesided();
   else if (newton_pair) pre_exchange_newton();
@@ -178,60 +219,57 @@ void FixShearHistory::pre_exchange()
    only store history info with spheres
 ------------------------------------------------------------------------- */
 
-void FixShearHistory::pre_exchange_onesided()
+void FixNeighHistory::pre_exchange_onesided()
 {
   int i,j,ii,jj,m,n,inum,jnum;
   int *ilist,*jlist,*numneigh,**firstneigh;
-  int *touch,**firsttouch;
-  double *shear,*allshear,**firstshear;
+  int *allflags;
+  double *allvalues,*onevalues;
 
   // NOTE: all operations until very end are with nlocal_neigh <= current nlocal
   // b/c previous neigh list was built with nlocal_neigh
   // nlocal can be larger if other fixes added atoms at this pre_exchange()
 
-  // zero npartner for owned atoms
-  // clear 2 page data structures
-
-  for (i = 0; i < nlocal_neigh; i++) npartner[i] = 0;
+  // clear two paged data structures
 
-  ipage->reset();
-  dpage->reset();
+  ipage_atom->reset();
+  dpage_atom->reset();
 
   // 1st loop over neighbor list, I = sphere, J = tri
   // only calculate npartner for owned spheres
 
+  for (i = 0; i < nlocal_neigh; i++) npartner[i] = 0;
+
   tagint *tag = atom->tag;
   NeighList *list = pair->list;
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
-  firsttouch = list->listhistory->firstneigh;
-  firstshear = list->listhistory->firstdouble;
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     jlist = firstneigh[i];
     jnum = numneigh[i];
-    touch = firsttouch[i];
+    allflags = firstflag[i];
 
     for (jj = 0; jj < jnum; jj++)
-      if (touch[jj]) npartner[i]++;
+      if (allflags[jj]) npartner[i]++;
   }
 
-  // get page chunks to store atom IDs and shear history for owned atoms
+  // get page chunks to store partner IDs and values for owned atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     n = npartner[i];
-    partner[i] = ipage->get(n);
-    shearpartner[i] = dpage->get(dnum*n);
-    if (partner[i] == NULL || shearpartner[i] == NULL)
-      error->one(FLERR,"Shear history overflow, boost neigh_modify one");
+    partner[i] = ipage_atom->get(n);
+    valuepartner[i] = dpage_atom->get(dnum*n);
+    if (partner[i] == NULL || valuepartner[i] == NULL)
+      error->one(FLERR,"Neighbor history overflow, boost neigh_modify one");
   }
 
   // 2nd loop over neighbor list, I = sphere, J = tri
-  // store atom IDs and shear history for owned spheres
+  // store partner IDs and values for owned+ghost atoms
   // re-zero npartner to use as counter
 
   for (i = 0; i < nlocal_neigh; i++) npartner[i] = 0;
@@ -239,28 +277,28 @@ void FixShearHistory::pre_exchange_onesided()
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     jlist = firstneigh[i];
-    allshear = firstshear[i];
     jnum = numneigh[i];
-    touch = firsttouch[i];
+    allflags = firstflag[i];
+    allvalues = firstvalue[i];
 
     for (jj = 0; jj < jnum; jj++) {
-      if (touch[jj]) {
-        shear = &allshear[dnum*jj];
+      if (allflags[jj]) {
+        onevalues = &allvalues[dnum*jj];
         j = jlist[jj];
         j &= NEIGHMASK;
         m = npartner[i]++;
         partner[i][m] = tag[j];
-        memcpy(&shearpartner[i][dnum*m],shear,dnumbytes);
+        memcpy(&valuepartner[i][dnum*m],onevalues,dnumbytes);
       }
     }
   }
 
-  // set maxtouch = max # of partners of any owned atom
+  // set maxpartner = max # of partners of any owned atom
   // bump up comm->maxexchange_fix if necessary
   
-  maxtouch = 0;
-  for (i = 0; i < nlocal_neigh; i++) maxtouch = MAX(maxtouch,npartner[i]);
-  comm->maxexchange_fix = MAX(comm->maxexchange_fix,(dnum+1)*maxtouch+1);
+  maxpartner = 0;
+  for (i = 0; i < nlocal_neigh; i++) maxpartner = MAX(maxpartner,npartner[i]);
+  comm->maxexchange_fix = MAX(comm->maxexchange_fix,(dnum+1)*maxpartner+1);
 
   // zero npartner values from previous nlocal_neigh to current nlocal
 
@@ -269,50 +307,47 @@ void FixShearHistory::pre_exchange_onesided()
 }
 
 /* ----------------------------------------------------------------------
-   newton on version, for sphere/sphere contacts
-   performs reverse comm to acquire shear partner info from ghost atoms
+   newton ON version
+   performs reverse comm to acquire partner values from ghost atoms
 ------------------------------------------------------------------------- */
 
-void FixShearHistory::pre_exchange_newton()
+void FixNeighHistory::pre_exchange_newton()
 {
   int i,j,ii,jj,m,n,inum,jnum;
   int *ilist,*jlist,*numneigh,**firstneigh;
-  int *touch,**firsttouch;
-  double *shear,*shearj,*allshear,**firstshear;
+  int *allflags;
+  double *allvalues,*onevalues,*jvalues;
 
   // NOTE: all operations until very end are with 
   //   nlocal_neigh  <= current nlocal and nall_neigh
-  // b/c previous neigh list was built with nlocal_neigh,nghost_neigh
+  // b/c previous neigh list was built with nlocal_neigh & nghost_neigh
   // nlocal can be larger if other fixes added atoms at this pre_exchange()
 
-  // zero npartner for owned+ghost atoms
-  // clear 2 page data structures
-
-  for (i = 0; i < nall_neigh; i++) npartner[i] = 0;
+  // clear two paged data structures
 
-  ipage->reset();
-  dpage->reset();
+  ipage_atom->reset();
+  dpage_atom->reset();
 
   // 1st loop over neighbor list
   // calculate npartner for owned+ghost atoms
 
+  for (i = 0; i < nall_neigh; i++) npartner[i] = 0;
+
   tagint *tag = atom->tag;
   NeighList *list = pair->list;
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
-  firsttouch = list->listhistory->firstneigh;
-  firstshear = list->listhistory->firstdouble;
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     jlist = firstneigh[i];
     jnum = numneigh[i];
-    touch = firsttouch[i];
+    allflags = firstflag[i];
 
     for (jj = 0; jj < jnum; jj++) {
-      if (touch[jj]) {
+      if (allflags[jj]) {
         npartner[i]++;
         j = jlist[jj];
         j &= NEIGHMASK;
@@ -326,29 +361,29 @@ void FixShearHistory::pre_exchange_newton()
   commflag = NPARTNER;
   comm->reverse_comm_fix(this,0);
 
-  // get page chunks to store atom IDs and shear history for owned+ghost atoms
+  // get page chunks to store partner IDs and values for owned+ghost atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     n = npartner[i];
-    partner[i] = ipage->get(n);
-    shearpartner[i] = dpage->get(dnum*n);
-    if (partner[i] == NULL || shearpartner[i] == NULL) {
-      error->one(FLERR,"Shear history overflow, boost neigh_modify one");
+    partner[i] = ipage_atom->get(n);
+    valuepartner[i] = dpage_atom->get(dnum*n);
+    if (partner[i] == NULL || valuepartner[i] == NULL) {
+      error->one(FLERR,"Neighbor history overflow, boost neigh_modify one");
     }
   }
 
   for (i = nlocal_neigh; i < nall_neigh; i++) {
     n = npartner[i];
-    partner[i] = ipage->get(n);
-    shearpartner[i] = dpage->get(dnum*n);
-    if (partner[i] == NULL || shearpartner[i] == NULL) {
-      error->one(FLERR,"Shear history overflow, boost neigh_modify one");
+    partner[i] = ipage_atom->get(n);
+    valuepartner[i] = dpage_atom->get(dnum*n);
+    if (partner[i] == NULL || valuepartner[i] == NULL) {
+      error->one(FLERR,"Neighbor history overflow, boost neigh_modify one");
     }
   }
 
   // 2nd loop over neighbor list
-  // store atom IDs and shear history for owned+ghost atoms
+  // store partner IDs and values for owned+ghost atoms
   // re-zero npartner to use as counter
 
   for (i = 0; i < nall_neigh; i++) npartner[i] = 0;
@@ -356,40 +391,40 @@ void FixShearHistory::pre_exchange_newton()
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     jlist = firstneigh[i];
-    allshear = firstshear[i];
     jnum = numneigh[i];
-    touch = firsttouch[i];
+    allflags = firstflag[i];
+    allvalues = firstvalue[i];
 
     for (jj = 0; jj < jnum; jj++) {
-      if (touch[jj]) {
-        shear = &allshear[dnum*jj];
+      if (allflags[jj]) {
+        onevalues = &allvalues[dnum*jj];
         j = jlist[jj];
         j &= NEIGHMASK;
         m = npartner[i]++;
         partner[i][m] = tag[j];
-        memcpy(&shearpartner[i][dnum*m],shear,dnumbytes);
+        memcpy(&valuepartner[i][dnum*m],onevalues,dnumbytes);
         m = npartner[j]++;
         partner[j][m] = tag[i];
-        shearj = &shearpartner[j][dnum*m];
-        for (n = 0; n < dnum; n++) shearj[n] = -shear[n];
+        jvalues = &valuepartner[j][dnum*m];
+        for (n = 0; n < dnum; n++) jvalues[n] = -onevalues[n];
       }
     }
   }
 
   // perform reverse comm to augment
-  // owned atom partner/shearpartner with ghost info
+  // owned atom partner/valuepartner with ghost info
   // use variable variant b/c size of packed data can be arbitrarily large
   //   if many touching neighbors for large particle
 
   commflag = PERPARTNER;
   comm->reverse_comm_fix_variable(this);
 
-  // set maxtouch = max # of partners of any owned atom
+  // set maxpartner = max # of partners of any owned atom
   // bump up comm->maxexchange_fix if necessary
 
-  maxtouch = 0;
-  for (i = 0; i < nlocal_neigh; i++) maxtouch = MAX(maxtouch,npartner[i]);
-  comm->maxexchange_fix = MAX(comm->maxexchange_fix,(dnum+1)*maxtouch+1);
+  maxpartner = 0;
+  for (i = 0; i < nlocal_neigh; i++) maxpartner = MAX(maxpartner,npartner[i]);
+  comm->maxexchange_fix = MAX(comm->maxexchange_fix,4*maxpartner+1);
 
   // zero npartner values from previous nlocal_neigh to current nlocal
 
@@ -398,49 +433,47 @@ void FixShearHistory::pre_exchange_newton()
 }
 
 /* ----------------------------------------------------------------------
-   newton off version, for sphere/sphere contacts
-   newton OFF works with smaller vectors that don't include ghost info
+   newton OFF version
+   do not need partner values from ghost atoms
+   assume J values are negative of I values
 ------------------------------------------------------------------------- */
 
-void FixShearHistory::pre_exchange_no_newton()
+void FixNeighHistory::pre_exchange_no_newton()
 {
   int i,j,ii,jj,m,n,inum,jnum;
   int *ilist,*jlist,*numneigh,**firstneigh;
-  int *touch,**firsttouch;
-  double *shear,*shearj,*allshear,**firstshear;
+  int *allflags;
+  double *allvalues,*onevalues,*jvalues;
 
   // NOTE: all operations until very end are with nlocal_neigh <= current nlocal
   // b/c previous neigh list was built with nlocal_neigh
   // nlocal can be larger if other fixes added atoms at this pre_exchange()
 
-  // zero npartner for owned atoms
-  // clear 2 page data structures
-
-  for (i = 0; i < nlocal_neigh; i++) npartner[i] = 0;
+  // clear two paged data structures
 
-  ipage->reset();
-  dpage->reset();
+  ipage_atom->reset();
+  dpage_atom->reset();
 
   // 1st loop over neighbor list
   // calculate npartner for owned atoms
 
+  for (i = 0; i < nlocal_neigh; i++) npartner[i] = 0;
+
   tagint *tag = atom->tag;
   NeighList *list = pair->list;
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
-  firsttouch = list->listhistory->firstneigh;
-  firstshear = list->listhistory->firstdouble;
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     jlist = firstneigh[i];
     jnum = numneigh[i];
-    touch = firsttouch[i];
+    allflags = firstflag[i];
 
     for (jj = 0; jj < jnum; jj++) {
-      if (touch[jj]) {
+      if (allflags[jj]) {
         npartner[i]++;
         j = jlist[jj];
         j &= NEIGHMASK;
@@ -449,19 +482,19 @@ void FixShearHistory::pre_exchange_no_newton()
     }
   }
 
-  // get page chunks to store atom IDs and shear history for owned atoms
+  // get page chunks to store partner IDs and values for owned atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     n = npartner[i];
-    partner[i] = ipage->get(n);
-    shearpartner[i] = dpage->get(dnum*n);
-    if (partner[i] == NULL || shearpartner[i] == NULL)
-      error->one(FLERR,"Shear history overflow, boost neigh_modify one");
+    partner[i] = ipage_atom->get(n);
+    valuepartner[i] = dpage_atom->get(dnum*n);
+    if (partner[i] == NULL || valuepartner[i] == NULL)
+      error->one(FLERR,"Neighbor history overflow, boost neigh_modify one");
   }
 
   // 2nd loop over neighbor list
-  // store atom IDs and shear history for owned atoms
+  // store partner IDs and values for owned+ghost atoms
   // re-zero npartner to use as counter
 
   for (i = 0; i < nlocal_neigh; i++) npartner[i] = 0;
@@ -469,34 +502,34 @@ void FixShearHistory::pre_exchange_no_newton()
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     jlist = firstneigh[i];
-    allshear = firstshear[i];
     jnum = numneigh[i];
-    touch = firsttouch[i];
+    allflags = firstflag[i];
+    allvalues = firstvalue[i];
 
     for (jj = 0; jj < jnum; jj++) {
-      if (touch[jj]) {
-        shear = &allshear[dnum*jj];
+      if (allflags[jj]) {
+        onevalues = &allvalues[dnum*jj];
         j = jlist[jj];
         j &= NEIGHMASK;
         m = npartner[i]++;
         partner[i][m] = tag[j];
-        memcpy(&shearpartner[i][dnum*m],shear,dnumbytes);
+        memcpy(&valuepartner[i][dnum*m],onevalues,dnumbytes);
         if (j < nlocal_neigh) {
           m = npartner[j]++;
           partner[j][m] = tag[i];
-          shearj = &shearpartner[j][dnum*m];
-          for (n = 0; n < dnum; n++) shearj[n] = -shear[n];
+          jvalues = &valuepartner[j][dnum*m];
+          for (n = 0; n < dnum; n++) jvalues[n] = -onevalues[n];
         }
       }
     }
   }
 
-  // set maxtouch = max # of partners of any owned atom
+  // set maxpartner = max # of partners of any owned atom
   // bump up comm->maxexchange_fix if necessary
   
-  maxtouch = 0;
-  for (i = 0; i < nlocal_neigh; i++) maxtouch = MAX(maxtouch,npartner[i]);
-  comm->maxexchange_fix = MAX(comm->maxexchange_fix,(dnum+1)*maxtouch+1);
+  maxpartner = 0;
+  for (i = 0; i < nlocal_neigh; i++) maxpartner = MAX(maxpartner,npartner[i]);
+  comm->maxexchange_fix = MAX(comm->maxexchange_fix,(dnum+1)*maxpartner+1);
 
   // zero npartner values from previous nlocal_neigh to current nlocal
 
@@ -506,14 +539,107 @@ void FixShearHistory::pre_exchange_no_newton()
 
 /* ---------------------------------------------------------------------- */
 
-void FixShearHistory::min_pre_exchange()
+void FixNeighHistory::min_pre_exchange()
 {
   pre_exchange();
 }
 
+/* ----------------------------------------------------------------------
+   called after neighbor list is build
+   recover history info stored temporarily in per-atom partner lists
+     and store afresh in per-neighbor firstflag and firstvalue lists
+------------------------------------------------------------------------- */
+
+void FixNeighHistory::post_neighbor()
+{
+  int i,j,m,ii,jj,nn,np,inum,jnum,rflag;
+  tagint jtag;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+  int *allflags;
+  double *allvalues;
+
+  // store atom counts used for new neighbor list which was just built
+
+  int nlocal = atom->nlocal;
+  int nall = nlocal + atom->nghost;
+  nlocal_neigh = nlocal;
+  nall_neigh = nall;
+
+  // realloc firstflag and firstvalue if needed
+
+  if (maxatom < nlocal) {
+    memory->sfree(firstflag);
+    memory->sfree(firstvalue);
+    maxatom = nall;
+    firstflag = (int **) 
+      memory->smalloc(maxatom*sizeof(int *),"neighbor_history:firstflag");
+    firstvalue = (double **) 
+      memory->smalloc(maxatom*sizeof(double *),"neighbor_history:firstvalue");
+  }
+
+  // loop over newly built neighbor list
+  // repopulate entire per-neighbor data structs
+  //   whether with old-neigh partner info or zeroes
+
+  ipage_neigh->reset();
+  dpage_neigh->reset();
+
+  tagint *tag = atom->tag;
+  NeighList *list = pair->list;
+  inum = list->inum;
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  for (ii = 0; ii < inum; ii++) {
+    i = ilist[ii];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+    firstflag[i] = allflags = ipage_neigh->get(jnum);
+    firstvalue[i] = allvalues = dpage_neigh->get(jnum*dnum);
+    np = npartner[i];
+    nn = 0;
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      rflag = sbmask(j);
+      j &= NEIGHMASK;
+      jlist[jj] = j;
+
+      // rflag = 1 if r < radsum in npair_size() method
+      // preserve neigh history info if tag[j] is in old-neigh partner list
+      // this test could be more geometrically precise for two sphere/line/tri
+
+      if (rflag) {
+        jtag = tag[j];
+        for (m = 0; m < np; m++)
+          if (partner[i][m] == jtag) break;
+        if (m < np) {
+          allflags[jj] = 1;
+          memcpy(&allvalues[nn],&valuepartner[i][dnum*m],dnumbytes);
+        } else {
+          allflags[jj] = 0;
+          memcpy(&allvalues[nn],zeroes,dnumbytes);
+        }
+      } else {
+        allflags[jj] = 0;
+        memcpy(&allvalues[nn],zeroes,dnumbytes);
+      }
+      nn += dnum;
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixNeighHistory::min_post_neighbor()
+{
+  post_neighbor();
+}
+
 /* ---------------------------------------------------------------------- */
 
-void FixShearHistory::post_run()
+void FixNeighHistory::post_run()
 {
   pre_exchange();
 }
@@ -522,17 +648,21 @@ void FixShearHistory::post_run()
    memory usage of local atom-based arrays
 ------------------------------------------------------------------------- */
 
-double FixShearHistory::memory_usage()
+double FixNeighHistory::memory_usage()
 {
   int nmax = atom->nmax;
-  double bytes = nmax * sizeof(int);
-  bytes += nmax * sizeof(tagint *);
-  bytes += nmax * sizeof(double *);
+  double bytes = nmax * sizeof(int);    // npartner
+  bytes += nmax * sizeof(tagint *);     // partner
+  bytes += nmax * sizeof(double *);     // valuepartner
+  bytes += maxatom * sizeof(int *);     // firstflag
+  bytes += maxatom * sizeof(double *);  // firstvalue
 
   int nmypage = comm->nthreads;
   for (int i = 0; i < nmypage; i++) {
-    bytes += ipage[i].size();
-    bytes += dpage[i].size();
+    bytes += ipage_atom[i].size();
+    bytes += dpage_atom[i].size();
+    bytes += ipage_neigh[i].size();
+    bytes += dpage_neigh[i].size();
   }
 
   return bytes;
@@ -542,38 +672,38 @@ double FixShearHistory::memory_usage()
    allocate local atom-based arrays
 ------------------------------------------------------------------------- */
 
-void FixShearHistory::grow_arrays(int nmax)
+void FixNeighHistory::grow_arrays(int nmax)
 {
-  memory->grow(npartner,nmax,"shear_history:npartner");
+  memory->grow(npartner,nmax,"neighbor_history:npartner");
   partner = (tagint **) memory->srealloc(partner,nmax*sizeof(tagint *),
-                                         "shear_history:partner");
-  shearpartner = (double **) memory->srealloc(shearpartner,
+                                         "neighbor_history:partner");
+  valuepartner = (double **) memory->srealloc(valuepartner,
                                               nmax*sizeof(double *),
-                                              "shear_history:shearpartner");
+                                              "neighbor_history:valuepartner");
 }
 
 /* ----------------------------------------------------------------------
    copy values within local atom-based arrays
 ------------------------------------------------------------------------- */
 
-void FixShearHistory::copy_arrays(int i, int j, int delflag)
+void FixNeighHistory::copy_arrays(int i, int j, int delflag)
 {
-  // just copy pointers for partner and shearpartner
-  // b/c can't overwrite chunk allocation inside ipage,dpage
+  // just copy pointers for partner and valuepartner
+  // b/c can't overwrite chunk allocation inside ipage_atom,dpage_atom
   // incoming atoms in unpack_exchange just grab new chunks
   // so are orphaning chunks for migrating atoms
-  // OK, b/c will reset ipage,dpage on next reneighboring
+  // OK, b/c will reset ipage_atom,dpage_atom on next reneighboring
 
   npartner[j] = npartner[i];
   partner[j] = partner[i];
-  shearpartner[j] = shearpartner[i];
+  valuepartner[j] = valuepartner[i];
 }
 
 /* ----------------------------------------------------------------------
    initialize one atom's array values, called when atom is created
 ------------------------------------------------------------------------- */
 
-void FixShearHistory::set_arrays(int i)
+void FixNeighHistory::set_arrays(int i)
 {
   npartner[i] = 0;
 }
@@ -582,7 +712,7 @@ void FixShearHistory::set_arrays(int i)
    only called by Comm::reverse_comm_fix_variable for PERPARTNER mode
 ------------------------------------------------------------------------- */
 
-int FixShearHistory::pack_reverse_comm_size(int n, int first)
+int FixNeighHistory::pack_reverse_comm_size(int n, int first)
 {
   int i,last;
 
@@ -590,7 +720,7 @@ int FixShearHistory::pack_reverse_comm_size(int n, int first)
   last = first + n;
 
   for (i = first; i < last; i++)
-    m += 1 + (dnum+1)*npartner[i];
+    m += 1 + 4*npartner[i];
 
   return m;
 }
@@ -599,7 +729,7 @@ int FixShearHistory::pack_reverse_comm_size(int n, int first)
    two modes: NPARTNER and PERPARTNER
 ------------------------------------------------------------------------- */
 
-int FixShearHistory::pack_reverse_comm(int n, int first, double *buf)
+int FixNeighHistory::pack_reverse_comm(int n, int first, double *buf)
 {
   int i,k,last;
 
@@ -615,11 +745,11 @@ int FixShearHistory::pack_reverse_comm(int n, int first, double *buf)
       buf[m++] = npartner[i];
       for (k = 0; k < npartner[i]; k++) {
         buf[m++] = partner[i][k];
-        memcpy(&buf[m],&shearpartner[i][dnum*k],dnumbytes);
+        memcpy(&buf[m],&valuepartner[i][dnum*k],dnumbytes);
         m += dnum;
       }
     }
-  } else error->all(FLERR,"Unsupported comm mode in shear history");
+  } else error->all(FLERR,"Unsupported comm mode in neighbor history");
 
   return m;
 }
@@ -628,7 +758,7 @@ int FixShearHistory::pack_reverse_comm(int n, int first, double *buf)
    two modes: NPARTNER and PERPARTNER
 ------------------------------------------------------------------------- */
 
-void FixShearHistory::unpack_reverse_comm(int n, int *list, double *buf)
+void FixNeighHistory::unpack_reverse_comm(int n, int *list, double *buf)
 {
   int i,j,k,kk,ncount;
 
@@ -646,18 +776,18 @@ void FixShearHistory::unpack_reverse_comm(int n, int *list, double *buf)
       for (k = 0; k < ncount; k++) {
         kk = npartner[j]++;
         partner[j][kk] = static_cast<tagint> (buf[m++]);
-        memcpy(&shearpartner[j][dnum*kk],&buf[m],dnumbytes);
+        memcpy(&valuepartner[j][dnum*kk],&buf[m],dnumbytes);
         m += dnum;
       }
     }
-  } else error->all(FLERR,"Unsupported comm mode in shear history");
+  } else error->all(FLERR,"Unsupported comm mode in neighbor history");
 }
 
 /* ----------------------------------------------------------------------
    pack values in local atom-based arrays for exchange with another proc
 ------------------------------------------------------------------------- */
 
-int FixShearHistory::pack_exchange(int i, double *buf)
+int FixNeighHistory::pack_exchange(int i, double *buf)
 {
   // NOTE: how do I know comm buf is big enough if extreme # of touching neighs
   // Comm::BUFEXTRA may need to be increased
@@ -666,7 +796,7 @@ int FixShearHistory::pack_exchange(int i, double *buf)
   buf[m++] = npartner[i];
   for (int n = 0; n < npartner[i]; n++) {
     buf[m++] = partner[i][n];
-    memcpy(&buf[m],&shearpartner[i][dnum*n],dnumbytes);
+    memcpy(&buf[m],&valuepartner[i][dnum*n],dnumbytes);
     m += dnum;
   }
   return m;
@@ -676,18 +806,18 @@ int FixShearHistory::pack_exchange(int i, double *buf)
    unpack values in local atom-based arrays from exchange with another proc
 ------------------------------------------------------------------------- */
 
-int FixShearHistory::unpack_exchange(int nlocal, double *buf)
+int FixNeighHistory::unpack_exchange(int nlocal, double *buf)
 {
-  // allocate new chunks from ipage,dpage for incoming values
+  // allocate new chunks from ipage_atom,dpage_atom for incoming values
 
   int m = 0;
   npartner[nlocal] = static_cast<int> (buf[m++]);
-  maxtouch = MAX(maxtouch,npartner[nlocal]);
-  partner[nlocal] = ipage->get(npartner[nlocal]);
-  shearpartner[nlocal] = dpage->get(dnum*npartner[nlocal]);
+  maxpartner = MAX(maxpartner,npartner[nlocal]);
+  partner[nlocal] = ipage_atom->get(npartner[nlocal]);
+  valuepartner[nlocal] = dpage_atom->get(dnum*npartner[nlocal]);
   for (int n = 0; n < npartner[nlocal]; n++) {
     partner[nlocal][n] = static_cast<tagint> (buf[m++]);
-    memcpy(&shearpartner[nlocal][dnum*n],&buf[m],dnumbytes);
+    memcpy(&valuepartner[nlocal][dnum*n],&buf[m],dnumbytes);
     m += dnum;
   }
   return m;
@@ -697,13 +827,13 @@ int FixShearHistory::unpack_exchange(int nlocal, double *buf)
    pack values in local atom-based arrays for restart file
 ------------------------------------------------------------------------- */
 
-int FixShearHistory::pack_restart(int i, double *buf)
+int FixNeighHistory::pack_restart(int i, double *buf)
 {
   int m = 1;
   buf[m++] = npartner[i];
   for (int n = 0; n < npartner[i]; n++) {
     buf[m++] = partner[i][n];
-    memcpy(&buf[m],&shearpartner[i][dnum*n],dnumbytes);
+    memcpy(&buf[m],&valuepartner[i][dnum*n],dnumbytes);
     m += dnum;
   }
   buf[0] = m;
@@ -714,11 +844,11 @@ int FixShearHistory::pack_restart(int i, double *buf)
    unpack values from atom->extra array to restart the fix
 ------------------------------------------------------------------------- */
 
-void FixShearHistory::unpack_restart(int nlocal, int nth)
+void FixNeighHistory::unpack_restart(int nlocal, int nth)
 {
-  // ipage = NULL if being called from granular pair style init()
+  // ipage_atom = NULL if being called from granular pair style init()
 
-  if (ipage == NULL) allocate_pages();
+  if (ipage_atom == NULL) allocate_pages();
 
   // skip to Nth set of extra values
 
@@ -728,15 +858,15 @@ void FixShearHistory::unpack_restart(int nlocal, int nth)
   for (int i = 0; i < nth; i++) m += static_cast<int> (extra[nlocal][m]);
   m++;
 
-  // allocate new chunks from ipage,dpage for incoming values
+  // allocate new chunks from ipage_atom,dpage_atom for incoming values
 
   npartner[nlocal] = static_cast<int> (extra[nlocal][m++]);
-  maxtouch = MAX(maxtouch,npartner[nlocal]);
-  partner[nlocal] = ipage->get(npartner[nlocal]);
-  shearpartner[nlocal] = dpage->get(dnum*npartner[nlocal]);
+  maxpartner = MAX(maxpartner,npartner[nlocal]);
+  partner[nlocal] = ipage_atom->get(npartner[nlocal]);
+  valuepartner[nlocal] = dpage_atom->get(dnum*npartner[nlocal]);
   for (int n = 0; n < npartner[nlocal]; n++) {
     partner[nlocal][n] = static_cast<tagint> (extra[nlocal][m++]);
-    memcpy(&shearpartner[nlocal][dnum*n],&extra[nlocal][m],dnumbytes);
+    memcpy(&valuepartner[nlocal][dnum*n],&extra[nlocal][m],dnumbytes);
     m += dnum;
   }
 }
@@ -745,20 +875,20 @@ void FixShearHistory::unpack_restart(int nlocal, int nth)
    maxsize of any atom's restart data
 ------------------------------------------------------------------------- */
 
-int FixShearHistory::maxsize_restart()
+int FixNeighHistory::maxsize_restart()
 {
-  // maxtouch_all = max # of touching partners across all procs
+  // maxpartner_all = max # of touching partners across all procs
 
-  int maxtouch_all;
-  MPI_Allreduce(&maxtouch,&maxtouch_all,1,MPI_INT,MPI_MAX,world);
-  return (dnum+1)*maxtouch_all + 2;
+  int maxpartner_all;
+  MPI_Allreduce(&maxpartner,&maxpartner_all,1,MPI_INT,MPI_MAX,world);
+  return (dnum+1)*maxpartner_all + 2;
 }
 
 /* ----------------------------------------------------------------------
    size of atom nlocal's restart data
 ------------------------------------------------------------------------- */
 
-int FixShearHistory::size_restart(int nlocal)
+int FixNeighHistory::size_restart(int nlocal)
 {
   return (dnum+1)*npartner[nlocal] + 2;
 }
diff --git a/src/fix_shear_history.h b/src/fix_neigh_history.h
similarity index 59%
rename from src/fix_shear_history.h
rename to src/fix_neigh_history.h
index 00f219f0340cd7f0422c3876bc32b1edeab2ec7d..7aed2d60357dfb81c0ff13ee357214f0c5dda4dc 100644
--- a/src/fix_shear_history.h
+++ b/src/fix_neigh_history.h
@@ -13,38 +13,35 @@
 
 #ifdef FIX_CLASS
 
-FixStyle(SHEAR_HISTORY,FixShearHistory)
+FixStyle(NEIGH_HISTORY,FixNeighHistory)
 
 #else
 
-#ifndef LMP_FIX_SHEAR_HISTORY_H
-#define LMP_FIX_SHEAR_HISTORY_H
+#ifndef LMP_FIX_NEIGH_HISTORY_H
+#define LMP_FIX_NEIGH_HISTORY_H
 
 #include "fix.h"
 #include "my_page.h"
 
 namespace LAMMPS_NS {
 
-class FixShearHistory : public Fix {
-  //friend class Neighbor;
-  //friend class PairGranHookeHistory;
-  friend class PairLineGranHookeHistory;
-  friend class PairTriGranHookeHistory;
-
+class FixNeighHistory : public Fix {
  public:
   int nlocal_neigh;             // nlocal at last time neigh list was built
   int nall_neigh;               // ditto for nlocal+nghost
-  int *npartner;                // # of touching partners of each atom
-  tagint **partner;             // global atom IDs for the partners
-  double **shearpartner;        // shear values with the partner
-  class Pair *pair;             // ptr to pair style that uses shear history
+  int **firstflag;              // ptr to each atom's neighbor flsg
+  double **firstvalue;          // ptr to each atom's values
+  class Pair *pair;             // ptr to pair style that uses neighbor history
 
-  FixShearHistory(class LAMMPS *, int, char **);
-  ~FixShearHistory();
+  FixNeighHistory(class LAMMPS *, int, char **);
+  ~FixNeighHistory();
   int setmask();
   void init();
-  virtual void pre_exchange();
+  void setup_post_neighbor();
+  void pre_exchange();
   void min_pre_exchange();
+  virtual void post_neighbor();
+  void min_post_neighbor();
   void post_run();
 
   double memory_usage();
@@ -64,20 +61,40 @@ class FixShearHistory : public Fix {
 
  protected:
   int newton_pair;              // same as force setting
-  int dnum,dnumbytes;           // dnum = # of shear history values
+  int dnum,dnumbytes;           // dnum = # of values per neighbor
   int onesided;                 // 1 for line/tri history, else 0
 
-  int maxtouch;                 // max # of touching partners for my atoms
+  int maxatom;                  // max size of firstflag and firstvalue
   int commflag;                 // mode of reverse comm to get ghost info
+  double *zeroes;
+
+  // per-atom data structures
+  // partners = flagged neighbors of an atom
+
+  int *npartner;                // # of partners of each atom
+  tagint **partner;             // global atom IDs for the partners
+  double **valuepartner;        // values for the partners
+  int maxpartner;               // max # of partners for any of my atoms
+
+  // per-atom data structs pointed to by partner & valuepartner
 
   int pgsize,oneatom;           // copy of settings in Neighbor
-  MyPage<tagint> *ipage;        // pages of partner atom IDs
-  MyPage<double> *dpage;        // pages of shear history with partners
+  MyPage<tagint> *ipage_atom;   // pages of partner atom IDs
+  MyPage<double> *dpage_atom;   // pages of partner values
+
+  // per-neighbor data structs pointed to by firstflag & firstvalue
 
-  void pre_exchange_onesided();
-  void pre_exchange_newton();
-  void pre_exchange_no_newton();
+  MyPage<int> *ipage_neigh;     // pages of local atom indices
+  MyPage<double> *dpage_neigh;  // pages of partner values
+
+  virtual void pre_exchange_onesided();
+  virtual void pre_exchange_newton();
+  virtual void pre_exchange_no_newton();
   void allocate_pages();
+
+  inline int sbmask(int j) const {
+    return j >> SBBITS & 3;
+  }
 };
 
 }
diff --git a/src/fix_nh.cpp b/src/fix_nh.cpp
index c99c7a34bdf66ce9a40041689a8a9f30a5686518..f95bde95d06fff004d53988eef5f9da7e2ffe6e3 100644
--- a/src/fix_nh.cpp
+++ b/src/fix_nh.cpp
@@ -353,6 +353,15 @@ FixNH::FixNH(LAMMPS *lmp, int narg, char **arg) :
     } else if (strcmp(arg[iarg],"disc") == 0) {
       iarg++;
 
+    // keywords erate, strain, and ext are also parsed in fix/nh/uef
+
+    } else if (strcmp(arg[iarg],"erate") == 0) {
+      iarg += 3;
+    } else if (strcmp(arg[iarg],"strain") == 0) {
+      iarg += 3;
+    } else if (strcmp(arg[iarg],"ext") == 0) {
+      iarg += 2;
+
     } else error->all(FLERR,"Illegal fix nvt/npt/nph command");
   }
 
diff --git a/src/fix_nve.cpp b/src/fix_nve.cpp
index 8dd016024fd760fcf795e0394dc29c346da6a732..64ec3373f12c81137a7884c25f358bcc2e609174 100644
--- a/src/fix_nve.cpp
+++ b/src/fix_nve.cpp
@@ -58,7 +58,6 @@ void FixNVE::init()
     step_respa = ((Respa *) update->integrate)->step;
 }
 
-
 /* ----------------------------------------------------------------------
    allow for both per-type and per-atom mass
 ------------------------------------------------------------------------- */
diff --git a/src/input.cpp b/src/input.cpp
index 7d11b8741b976ba47678b8742ded9f1e06ef8bdd..23b89d3040d7816546d5d701609bf0cf87bcac05 100644
--- a/src/input.cpp
+++ b/src/input.cpp
@@ -18,7 +18,7 @@
 #include <errno.h>
 #include <ctype.h>
 #include <unistd.h>
-#include "sys/stat.h"
+#include <sys/stat.h>
 #include "input.h"
 #include "style_command.h"
 #include "universe.h"
diff --git a/src/main.cpp b/src/main.cpp
index 7401183fea1f79a2d2a0cbd3425f20d763c0f7c4..82dac5af6d3338afc7703b1f32974be4cd5e99f8 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -22,6 +22,10 @@
 #include <fenv.h>
 #endif
 
+#ifdef FFT_FFTW3
+#include <fftw3.h>
+#endif
+
 using namespace LAMMPS_NS;
 
 /* ----------------------------------------------------------------------
@@ -62,4 +66,10 @@ int main(int argc, char **argv)
 #endif
   MPI_Barrier(MPI_COMM_WORLD);
   MPI_Finalize();
+
+#ifdef FFT_FFTW3
+  // tell fftw3 to delete its global memory pool
+  // and thus avoid bogus valgrind memory leak reports
+  fftw_cleanup();
+#endif
 }
diff --git a/src/min.cpp b/src/min.cpp
index af23629cad8eafefcaab22f5752ee8d87ea9014a..653cac71e6af5b3a52a00d808a47319242593def 100644
--- a/src/min.cpp
+++ b/src/min.cpp
@@ -246,6 +246,7 @@ void Min::setup(int flag)
   domain->box_too_small_check();
   modify->setup_pre_neighbor();
   neighbor->build();
+  modify->setup_post_neighbor();
   neighbor->ncalls = 0;
 
   // remove these restriction eventually
@@ -345,6 +346,7 @@ void Min::setup_minimal(int flag)
     domain->box_too_small_check();
     modify->setup_pre_neighbor();
     neighbor->build();
+    modify->setup_post_neighbor();
     neighbor->ncalls = 0;
   }
 
@@ -503,12 +505,15 @@ double Min::energy_force(int resetflag)
     if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
     timer->stamp(Timer::COMM);
     if (modify->n_min_pre_neighbor) {
-      timer->stamp();
       modify->min_pre_neighbor();
       timer->stamp(Timer::MODIFY);
     }
     neighbor->build();
     timer->stamp(Timer::NEIGH);
+    if (modify->n_min_post_neighbor) {
+      modify->min_post_neighbor();
+      timer->stamp(Timer::MODIFY);
+    }
   }
 
   ev_set(update->ntimestep);
diff --git a/src/modify.cpp b/src/modify.cpp
index 4516788aa940df4c7e4b431c7a7fe1f0208f8dc8..64970f2cf98b10cbf9ad9f7042392d5e2bf827b6 100644
--- a/src/modify.cpp
+++ b/src/modify.cpp
@@ -42,7 +42,7 @@ Modify::Modify(LAMMPS *lmp) : Pointers(lmp)
 {
   nfix = maxfix = 0;
   n_initial_integrate = n_post_integrate = 0;
-  n_pre_exchange = n_pre_neighbor = 0;
+  n_pre_exchange = n_pre_neighbor = n_post_neighbor = 0;
   n_pre_force = n_pre_reverse = n_post_force = 0;
   n_final_integrate = n_end_of_step = n_thermo_energy = 0;
   n_thermo_energy_atom = 0;
@@ -54,14 +54,14 @@ Modify::Modify(LAMMPS *lmp) : Pointers(lmp)
   fix = NULL;
   fmask = NULL;
   list_initial_integrate = list_post_integrate = NULL;
-  list_pre_exchange = list_pre_neighbor = NULL;
+  list_pre_exchange = list_pre_neighbor = list_post_neighbor = NULL;
   list_pre_force = list_pre_reverse = list_post_force = NULL;
   list_final_integrate = list_end_of_step = NULL;
   list_thermo_energy = list_thermo_energy_atom = NULL;
   list_initial_integrate_respa = list_post_integrate_respa = NULL;
   list_pre_force_respa = list_post_force_respa = NULL;
   list_final_integrate_respa = NULL;
-  list_min_pre_exchange = list_min_pre_neighbor = NULL;
+  list_min_pre_exchange = list_min_pre_neighbor = list_min_post_neighbor = NULL;
   list_min_pre_force = list_min_pre_reverse = list_min_post_force = NULL;
   list_min_energy = NULL;
 
@@ -110,7 +110,7 @@ Modify::~Modify()
   // delete all fixes
   // do it via delete_fix() so callbacks in Atom are also updated correctly
 
-  while (nfix) delete_fix(fix[0]->id);
+  while (nfix) delete_fix(0);
   memory->sfree(fix);
   memory->destroy(fmask);
 
@@ -123,6 +123,7 @@ Modify::~Modify()
   delete [] list_post_integrate;
   delete [] list_pre_exchange;
   delete [] list_pre_neighbor;
+  delete [] list_post_neighbor;
   delete [] list_pre_force;
   delete [] list_pre_reverse;
   delete [] list_post_force;
@@ -137,6 +138,7 @@ Modify::~Modify()
   delete [] list_final_integrate_respa;
   delete [] list_min_pre_exchange;
   delete [] list_min_pre_neighbor;
+  delete [] list_min_post_neighbor;
   delete [] list_min_pre_force;
   delete [] list_min_pre_reverse;
   delete [] list_min_post_force;
@@ -169,6 +171,7 @@ void Modify::init()
   list_init(POST_INTEGRATE,n_post_integrate,list_post_integrate);
   list_init(PRE_EXCHANGE,n_pre_exchange,list_pre_exchange);
   list_init(PRE_NEIGHBOR,n_pre_neighbor,list_pre_neighbor);
+  list_init(POST_NEIGHBOR,n_post_neighbor,list_post_neighbor);
   list_init(PRE_FORCE,n_pre_force,list_pre_force);
   list_init(PRE_REVERSE,n_pre_reverse,list_pre_reverse);
   list_init(POST_FORCE,n_post_force,list_post_force);
@@ -190,6 +193,7 @@ void Modify::init()
 
   list_init(MIN_PRE_EXCHANGE,n_min_pre_exchange,list_min_pre_exchange);
   list_init(MIN_PRE_NEIGHBOR,n_min_pre_neighbor,list_min_pre_neighbor);
+  list_init(MIN_POST_NEIGHBOR,n_min_post_neighbor,list_min_post_neighbor);
   list_init(MIN_PRE_FORCE,n_min_pre_force,list_min_pre_force);
   list_init(MIN_PRE_REVERSE,n_min_pre_reverse,list_min_pre_reverse);
   list_init(MIN_POST_FORCE,n_min_post_force,list_min_post_force);
@@ -329,6 +333,21 @@ void Modify::setup_pre_neighbor()
       fix[list_min_pre_neighbor[i]]->setup_pre_neighbor();
 }
 
+/* ----------------------------------------------------------------------
+   setup post_neighbor call, only for fixes that define post_neighbor
+   called from Verlet, RESPA
+------------------------------------------------------------------------- */
+
+void Modify::setup_post_neighbor()
+{
+  if (update->whichflag == 1)
+    for (int i = 0; i < n_post_neighbor; i++)
+      fix[list_post_neighbor[i]]->setup_post_neighbor();
+  else if (update->whichflag == 2)
+    for (int i = 0; i < n_min_post_neighbor; i++)
+      fix[list_min_post_neighbor[i]]->setup_post_neighbor();
+}
+
 /* ----------------------------------------------------------------------
    setup pre_force call, only for fixes that define pre_force
    called from Verlet, RESPA, Min
@@ -399,6 +418,16 @@ void Modify::pre_neighbor()
     fix[list_pre_neighbor[i]]->pre_neighbor();
 }
 
+/* ----------------------------------------------------------------------
+   post_neighbor call, only for relevant fixes
+------------------------------------------------------------------------- */
+
+void Modify::post_neighbor()
+{
+  for (int i = 0; i < n_post_neighbor; i++)
+    fix[list_post_neighbor[i]]->post_neighbor();
+}
+
 /* ----------------------------------------------------------------------
    pre_force call, only for relevant fixes
 ------------------------------------------------------------------------- */
@@ -589,6 +618,16 @@ void Modify::min_pre_neighbor()
     fix[list_min_pre_neighbor[i]]->min_pre_neighbor();
 }
 
+/* ----------------------------------------------------------------------
+   minimizer post-neighbor call, only for relevant fixes
+------------------------------------------------------------------------- */
+
+void Modify::min_post_neighbor()
+{
+  for (int i = 0; i < n_min_post_neighbor; i++)
+    fix[list_min_post_neighbor[i]]->min_post_neighbor();
+}
+
 /* ----------------------------------------------------------------------
    minimizer pre-force call, only for relevant fixes
 ------------------------------------------------------------------------- */
@@ -863,9 +902,9 @@ void Modify::add_fix(int narg, char **arg, int trysuffix)
       fix[ifix]->restart(state_restart_global[i]);
       used_restart_global[i] = 1;
       if (comm->me == 0) {
-	if (screen) 
+        if (screen)
           fprintf(screen,"Resetting global fix info from restart file:\n");
-	if (logfile) 
+        if (logfile)
           fprintf(logfile,"Resetting global fix info from restart file:\n");
         if (screen) fprintf(screen,"  fix style: %s, fix ID: %s\n",
                             fix[ifix]->style,fix[ifix]->id);
@@ -885,9 +924,9 @@ void Modify::add_fix(int narg, char **arg, int trysuffix)
         fix[ifix]->unpack_restart(j,index_restart_peratom[i]);
       fix[ifix]->restart_reset = 1;
       if (comm->me == 0) {
-	if (screen) 
+        if (screen)
           fprintf(screen,"Resetting peratom fix info from restart file:\n");
-	if (logfile) 
+        if (logfile)
           fprintf(logfile,"Resetting peratom fix info from restart file:\n");
         if (screen) fprintf(screen,"  fix style: %s, fix ID: %s\n",
                             fix[ifix]->style,fix[ifix]->id);
@@ -944,7 +983,12 @@ void Modify::delete_fix(const char *id)
 {
   int ifix = find_fix(id);
   if (ifix < 0) error->all(FLERR,"Could not find fix ID to delete");
-  delete fix[ifix];
+  delete_fix(ifix);
+}
+
+void Modify::delete_fix(int ifix)
+{
+  if (fix[ifix]) delete fix[ifix];
   atom->update_callback(ifix);
 
   // move other Fixes and fmask down in list one slot
@@ -1409,24 +1453,24 @@ void Modify::restart_deallocate(int flag)
     if (flag && comm->me == 0) {
       int i;
       for (i = 0; i < nfix_restart_global; i++)
-	if (used_restart_global[i] == 0) break;
+        if (used_restart_global[i] == 0) break;
       if (i == nfix_restart_global) {
-	if (screen) 
+        if (screen)
           fprintf(screen,"All restart file global fix info "
                   "was re-assigned\n");
-	if (logfile) 
+        if (logfile)
           fprintf(logfile,"All restart file global fix info "
                   "was re-assigned\n");
       } else {
-	if (screen) fprintf(screen,"Unused restart file global fix info:\n");
-	if (logfile) fprintf(logfile,"Unused restart file global fix info:\n");
-	for (i = 0; i < nfix_restart_global; i++) {
-	  if (used_restart_global[i]) continue;
-	  if (screen) fprintf(screen,"  fix style: %s, fix ID: %s\n",
-			      style_restart_global[i],id_restart_global[i]);
-	  if (logfile) fprintf(logfile,"  fix style: %s, fix ID: %s\n",
-			       style_restart_global[i],id_restart_global[i]);
-	}
+        if (screen) fprintf(screen,"Unused restart file global fix info:\n");
+        if (logfile) fprintf(logfile,"Unused restart file global fix info:\n");
+        for (i = 0; i < nfix_restart_global; i++) {
+          if (used_restart_global[i]) continue;
+          if (screen) fprintf(screen,"  fix style: %s, fix ID: %s\n",
+                              style_restart_global[i],id_restart_global[i]);
+          if (logfile) fprintf(logfile,"  fix style: %s, fix ID: %s\n",
+                               style_restart_global[i],id_restart_global[i]);
+        }
       }
     }
 
@@ -1445,24 +1489,24 @@ void Modify::restart_deallocate(int flag)
     if (flag && comm->me == 0) {
       int i;
       for (i = 0; i < nfix_restart_peratom; i++)
-	if (used_restart_peratom[i] == 0) break;
+        if (used_restart_peratom[i] == 0) break;
       if (i == nfix_restart_peratom) {
-	if (screen) 
+        if (screen)
           fprintf(screen,"All restart file peratom fix info "
                   "was re-assigned\n");
-	if (logfile) 
+        if (logfile)
           fprintf(logfile,"All restart file peratom fix info "
                   "was re-assigned\n");
       } else {
-	if (screen) fprintf(screen,"Unused restart file peratom fix info:\n");
-	if (logfile) fprintf(logfile,"Unused restart file peratom fix info:\n");
-	for (i = 0; i < nfix_restart_peratom; i++) {
-	  if (used_restart_peratom[i]) continue;
-	  if (screen) fprintf(screen,"  fix style: %s, fix ID: %s\n",
-			      style_restart_peratom[i],id_restart_peratom[i]);
-	  if (logfile) fprintf(logfile,"  fix style: %s, fix ID: %s\n",
-			       style_restart_peratom[i],id_restart_peratom[i]);
-	}
+        if (screen) fprintf(screen,"Unused restart file peratom fix info:\n");
+        if (logfile) fprintf(logfile,"Unused restart file peratom fix info:\n");
+        for (i = 0; i < nfix_restart_peratom; i++) {
+          if (used_restart_peratom[i]) continue;
+          if (screen) fprintf(screen,"  fix style: %s, fix ID: %s\n",
+                              style_restart_peratom[i],id_restart_peratom[i]);
+          if (logfile) fprintf(logfile,"  fix style: %s, fix ID: %s\n",
+                               style_restart_peratom[i],id_restart_peratom[i]);
+        }
       }
     }
 
diff --git a/src/modify.h b/src/modify.h
index d825d5c4efbf543a709e766253a0a7e600cf8dcf..3e20df5aac152affa32c8b7ff642ed621ffc2ba8 100644
--- a/src/modify.h
+++ b/src/modify.h
@@ -29,12 +29,13 @@ class Modify : protected Pointers {
 
  public:
   int nfix,maxfix;
-  int n_initial_integrate,n_post_integrate,n_pre_exchange,n_pre_neighbor;
+  int n_initial_integrate,n_post_integrate,n_pre_exchange;
+  int n_pre_neighbor,n_post_neighbor;
   int n_pre_force,n_pre_reverse,n_post_force;
   int n_final_integrate,n_end_of_step,n_thermo_energy,n_thermo_energy_atom;
   int n_initial_integrate_respa,n_post_integrate_respa;
   int n_pre_force_respa,n_post_force_respa,n_final_integrate_respa;
-  int n_min_pre_exchange,n_min_pre_neighbor;
+  int n_min_pre_exchange,n_min_pre_neighbor,n_min_post_neighbor;
   int n_min_pre_force,n_min_pre_reverse,n_min_post_force,n_min_energy;
 
   int restart_pbc_any;       // 1 if any fix sets restart_pbc
@@ -53,12 +54,14 @@ class Modify : protected Pointers {
   virtual void setup(int);
   virtual void setup_pre_exchange();
   virtual void setup_pre_neighbor();
+  virtual void setup_post_neighbor();
   virtual void setup_pre_force(int);
   virtual void setup_pre_reverse(int, int);
   virtual void initial_integrate(int);
   virtual void post_integrate();
   virtual void pre_exchange();
   virtual void pre_neighbor();
+  virtual void post_neighbor();
   virtual void pre_force(int);
   virtual void pre_reverse(int,int);
   virtual void post_force(int);
@@ -78,6 +81,7 @@ class Modify : protected Pointers {
 
   virtual void min_pre_exchange();
   virtual void min_pre_neighbor();
+  virtual void min_post_neighbor();
   virtual void min_pre_force(int);
   virtual void min_pre_reverse(int,int);
   virtual void min_post_force(int);
@@ -95,6 +99,7 @@ class Modify : protected Pointers {
   void add_fix(int, char **, int trysuffix=1);
   void modify_fix(int, char **);
   void delete_fix(const char *);
+  void delete_fix(int);
   int find_fix(const char *);
   int find_fix_by_style(const char *);
   int check_package(const char *);
@@ -122,14 +127,14 @@ class Modify : protected Pointers {
   // lists of fixes to apply at different stages of timestep
 
   int *list_initial_integrate,*list_post_integrate;
-  int *list_pre_exchange,*list_pre_neighbor;
+  int *list_pre_exchange,*list_pre_neighbor,*list_post_neighbor;
   int *list_pre_force,*list_pre_reverse,*list_post_force;
   int *list_final_integrate,*list_end_of_step,*list_thermo_energy;
   int *list_thermo_energy_atom;
   int *list_initial_integrate_respa,*list_post_integrate_respa;
   int *list_pre_force_respa,*list_post_force_respa;
   int *list_final_integrate_respa;
-  int *list_min_pre_exchange,*list_min_pre_neighbor;
+  int *list_min_pre_exchange,*list_min_pre_neighbor,*list_min_post_neighbor;
   int *list_min_pre_force,*list_min_pre_reverse,*list_min_post_force;
   int *list_min_energy;
 
diff --git a/src/neigh_list.cpp b/src/neigh_list.cpp
index dde544a69f181e669ca349e79b08e3ef101103d7..934b9f7d9b0409825efc3a51d269df88ba46a15b 100644
--- a/src/neigh_list.cpp
+++ b/src/neigh_list.cpp
@@ -40,16 +40,18 @@ NeighList::NeighList(LAMMPS *lmp) : Pointers(lmp)
   ilist = NULL;
   numneigh = NULL;
   firstneigh = NULL;
-  firstdouble = NULL;
 
   // defaults, but may be reset by post_constructor()
 
   occasional = 0;
   ghost = 0;
   ssa = 0;
+  history = 0;
+  respaouter = 0;
+  respamiddle = 0;
+  respainner = 0;
   copy = 0;
   copymode = 0;
-  dnum = 0;
 
   // ptrs
 
@@ -60,17 +62,24 @@ NeighList::NeighList(LAMMPS *lmp) : Pointers(lmp)
   listskip = NULL;
   listfull = NULL;
 
-  listhistory = NULL;
-  fix_history = NULL;
-
-  respamiddle = 0;
-  listinner = NULL;
-  listmiddle = NULL;
-
   fix_bond = NULL;
 
   ipage = NULL;
-  dpage = NULL;
+
+  // extra rRESPA lists
+
+  inum_inner = gnum_inner = 0;
+  ilist_inner = NULL;
+  numneigh_inner = NULL;
+  firstneigh_inner = NULL;
+
+  inum_middle = gnum_middle = 0;
+  ilist_middle = NULL;
+  numneigh_middle = NULL;
+  firstneigh_middle = NULL;
+
+  ipage_inner = NULL;
+  ipage_middle = NULL;
 
   // Kokkos package
 
@@ -92,10 +101,21 @@ NeighList::~NeighList()
     memory->destroy(ilist);
     memory->destroy(numneigh);
     memory->sfree(firstneigh);
-    memory->sfree(firstdouble);
-
     delete [] ipage;
-    delete [] dpage;
+  }
+
+  if (respainner) {
+    memory->destroy(ilist_inner);
+    memory->destroy(numneigh_inner);
+    memory->sfree(firstneigh_inner);
+    delete [] ipage_inner;
+  }
+
+  if (respamiddle) {
+    memory->destroy(ilist_middle);
+    memory->destroy(numneigh_middle);
+    memory->sfree(firstneigh_middle);
+    delete [] ipage_middle;
   }
 
   delete [] iskip;
@@ -108,8 +128,7 @@ NeighList::~NeighList()
    copy -> set listcopy for list to copy from
    skip -> set listskip for list to skip from, create copy of itype,ijtype
    halffull -> set listfull for full list to derive from
-   history -> set LH and FH ptrs in partner list that uses the history info
-   respaouter -> set listinner/listmiddle for other rRESPA lists
+   respaouter -> set all 3 outer/middle/inner flags
    bond -> set fix_bond to Fix that made the request
 ------------------------------------------------------------------------- */
 
@@ -120,8 +139,11 @@ void NeighList::post_constructor(NeighRequest *nq)
   occasional = nq->occasional;
   ghost = nq->ghost;
   ssa = nq->ssa;
+  history = nq->history;
+  respaouter = nq->respaouter;
+  respamiddle = nq->respamiddle;
+  respainner = nq->respainner;
   copy = nq->copy;
-  dnum = nq->dnum;
 
   if (nq->copy)
     listcopy = neighbor->lists[nq->copylist];
@@ -141,24 +163,6 @@ void NeighList::post_constructor(NeighRequest *nq)
   if (nq->halffull)
     listfull = neighbor->lists[nq->halffulllist];
 
-  if (nq->history) {
-    neighbor->lists[nq->historylist]->listhistory = this;
-    int tmp;
-    neighbor->lists[nq->historylist]->fix_history = 
-      (Fix *) ((Pair *) nq->requestor)->extract("history",tmp);
-  }
-  
-  if (nq->respaouter) {
-    if (nq->respamiddlelist < 0) {
-      respamiddle = 0;
-      listinner = neighbor->lists[nq->respainnerlist];
-    } else {
-      respamiddle = 1;
-      listmiddle = neighbor->lists[nq->respamiddlelist];
-      listinner = neighbor->lists[nq->respainnerlist];
-    }
-  }
-
   if (nq->bond) fix_bond = (Fix *) nq->requestor;
 }
 
@@ -174,32 +178,29 @@ void NeighList::setup_pages(int pgsize_caller, int oneatom_caller)
   for (int i = 0; i < nmypage; i++)
     ipage[i].init(oneatom,pgsize,PGDELTA);
 
-  if (dnum) {
-    dpage = new MyPage<double>[nmypage];
+  if (respainner) {
+    ipage_inner = new MyPage<int>[nmypage];
     for (int i = 0; i < nmypage; i++)
-      dpage[i].init(dnum*oneatom,dnum*pgsize,PGDELTA);
-  } else dpage = NULL;
+      ipage_inner[i].init(oneatom,pgsize,PGDELTA);
+  }
+
+  if (respamiddle) {
+    ipage_middle = new MyPage<int>[nmypage];
+    for (int i = 0; i < nmypage; i++)
+      ipage_middle[i].init(oneatom,pgsize,PGDELTA);
+  }
 }
 
 /* ----------------------------------------------------------------------
    grow per-atom data to allow for nlocal/nall atoms
-   for parent lists:
-     also trigger grow in child list(s) which are not built themselves
-     history calls grow() in listhistory
-     respaouter calls grow() in respainner, respamiddle
    triggered by neighbor list build
    not called if a copy list
 ------------------------------------------------------------------------- */
 
 void NeighList::grow(int nlocal, int nall)
 {
-  // trigger grow() in children before possible return
-
-  if (listhistory) listhistory->grow(nlocal,nall);
-  if (listinner) listinner->grow(nlocal,nall);
-  if (listmiddle) listmiddle->grow(nlocal,nall);
-
   // skip if data structs are already big enough
+
   if (ssa) {
     if ((nlocal * 3) + nall <= maxatom) return;
   } else if (ghost) {
@@ -218,10 +219,25 @@ void NeighList::grow(int nlocal, int nall)
   memory->create(numneigh,maxatom,"neighlist:numneigh");
   firstneigh = (int **) memory->smalloc(maxatom*sizeof(int *),
                                         "neighlist:firstneigh");
-  if (dnum) {
-    memory->sfree(firstdouble);
-    firstdouble = (double **) memory->smalloc(maxatom*sizeof(double *),
-                                              "neighlist:firstdouble");
+
+  if (respainner) {
+    memory->destroy(ilist_inner);
+    memory->destroy(numneigh_inner);
+    memory->sfree(firstneigh_inner);
+    memory->create(ilist_inner,maxatom,"neighlist:ilist_inner");
+    memory->create(numneigh_inner,maxatom,"neighlist:numneigh_inner");
+    firstneigh_inner = (int **) memory->smalloc(maxatom*sizeof(int *),
+                                                "neighlist:firstneigh_inner");
+  }
+
+  if (respamiddle) {
+    memory->destroy(ilist_middle);
+    memory->destroy(numneigh_middle);
+    memory->sfree(firstneigh_middle);
+    memory->create(ilist_middle,maxatom,"neighlist:ilist_middle");
+    memory->create(numneigh_middle,maxatom,"neighlist:numneigh_middle");
+    firstneigh_middle = (int **) memory->smalloc(maxatom*sizeof(int *),
+                                                 "neighlist:firstneigh_middle");
   }
 }
 
@@ -253,22 +269,20 @@ void NeighList::print_attributes()
   printf("  %d = size\n",rq->size);
   printf("  %d = history\n",rq->history);
   printf("  %d = granonesided\n",rq->granonesided);
-  printf("  %d = respainner\n",rq->respainner);
-  printf("  %d = respamiddle\n",rq->respamiddle);
   printf("  %d = respaouter\n",rq->respaouter);
+  printf("  %d = respamiddle\n",rq->respamiddle);
+  printf("  %d = respainner\n",rq->respainner);
   printf("  %d = bond\n",rq->bond);
   printf("  %d = omp\n",rq->omp);
   printf("  %d = intel\n",rq->intel);
   printf("  %d = kokkos host\n",rq->kokkos_host);
   printf("  %d = kokkos device\n",rq->kokkos_device);
   printf("  %d = ssa flag\n",ssa);
-  printf("  %d = dnum\n",dnum);
   printf("\n");
   printf("  %d = skip flag\n",rq->skip);
   printf("  %d = off2on\n",rq->off2on);
   printf("  %d = copy flag\n",rq->copy);
   printf("  %d = half/full\n",rq->halffull);
-  printf("  %d = history/partner\n",rq->history_partner);
   printf("\n");
 }
 
@@ -292,10 +306,23 @@ bigint NeighList::memory_usage()
       bytes += ipage[i].size();
   }
 
-  if (dnum && dpage) {
-    for (int i = 0; i < nmypage; i++) {
-      bytes += maxatom * sizeof(double *);
-      bytes += dpage[i].size();
+  if (respainner) {
+    bytes += memory->usage(ilist_inner,maxatom);
+    bytes += memory->usage(numneigh_inner,maxatom);
+    bytes += maxatom * sizeof(int *);
+    if (ipage_inner) {
+      for (int i = 0; i < nmypage; i++)
+        bytes += ipage_inner[i].size();
+    }
+  }
+
+  if (respamiddle) {
+    bytes += memory->usage(ilist_middle,maxatom);
+    bytes += memory->usage(numneigh_middle,maxatom);
+    bytes += maxatom * sizeof(int *);
+    if (ipage_middle) {
+      for (int i = 0; i < nmypage; i++)
+        bytes += ipage_middle[i].size();
     }
   }
 
diff --git a/src/neigh_list.h b/src/neigh_list.h
index 4010a68857f3e9557c16224dc6e444cc652b8d79..d633ba839e0fc286856f54148a57f64f6559fc0c 100644
--- a/src/neigh_list.h
+++ b/src/neigh_list.h
@@ -34,9 +34,12 @@ class NeighList : protected Pointers {
   int occasional;                  // 0 if build every reneighbor, 1 if not
   int ghost;                       // 1 if list stores neighbors of ghosts
   int ssa;                         // 1 if list stores Shardlow data
-  int copy;                        // 1 if this list is (host) copied from another list
+  int history;                     // 1 if there is neigh history (FixNeighHist)
+  int respaouter;                  // 1 if list is a rRespa outer list
+  int respamiddle;                 // 1 if there is also a rRespa middle list
+  int respainner;                  // 1 if there is also a rRespa inner list
+  int copy;                        // 1 if this list is copied from another list
   int copymode;                    // 1 if this is a Kokkos on-device copy
-  int dnum;                        // # of doubles per neighbor, 0 if none
 
   // data structs to store neighbor pairs I,J and associated values
 
@@ -45,13 +48,28 @@ class NeighList : protected Pointers {
   int *ilist;                      // local indices of I atoms
   int *numneigh;                   // # of J neighbors for each I atom
   int **firstneigh;                // ptr to 1st J int value of each I atom
-  double **firstdouble;            // ptr to 1st J double value of each I atom
   int maxatom;                     // size of allocated per-atom arrays
 
   int pgsize;                      // size of each page
   int oneatom;                     // max size for one atom
   MyPage<int> *ipage;              // pages of neighbor indices
-  MyPage<double> *dpage;           // pages of neighbor doubles, if dnum > 0
+
+  // data structs to store rRESPA neighbor pairs I,J and associated values
+
+  int inum_inner;                  // # of I atoms neighbors are stored for
+  int gnum_inner;                  // # of ghost atoms neighbors are stored for
+  int *ilist_inner;                // local indices of I atoms
+  int *numneigh_inner;             // # of J neighbors for each I atom
+  int **firstneigh_inner;          // ptr to 1st J int value of each I atom
+
+  int inum_middle;                 // # of I atoms neighbors are stored for
+  int gnum_middle;                 // # of ghost atoms neighbors are stored for
+  int *ilist_middle;               // local indices of I atoms
+  int *numneigh_middle;            // # of J neighbors for each I atom
+  int **firstneigh_middle;         // ptr to 1st J int value of each I atom
+
+  MyPage<int> *ipage_inner;        // pages of neighbor indices for inner
+  MyPage<int> *ipage_middle;       // pages of neighbor indices for middle
 
   // atom types to skip when building list
   // copied info from corresponding request into realloced vec/array
@@ -65,13 +83,6 @@ class NeighList : protected Pointers {
   NeighList *listskip;          // me = skip list, point to list I skip from
   NeighList *listfull;          // me = half list, point to full I derive from
 
-  NeighList *listhistory;       // list storing neigh history
-  class Fix *fix_history;       // fix that stores history info
-
-  int respamiddle;              // 1 if this respaouter has middle list
-  NeighList *listinner;         // me = respaouter, point to respainner
-  NeighList *listmiddle;        // me = respaouter, point to respamiddle
-
   class Fix *fix_bond;          // fix that stores bond info
 
   // Kokkos package
diff --git a/src/neigh_request.cpp b/src/neigh_request.cpp
index 8d720e766cc6930c7d3f65ffa5e2582368d5a1a6..6325eec56683e892b5384f20ed2d9610dfdb19fa 100644
--- a/src/neigh_request.cpp
+++ b/src/neigh_request.cpp
@@ -42,7 +42,7 @@ NeighRequest::NeighRequest(LAMMPS *lmp) : Pointers(lmp)
   // default is use newton_pair setting in force
   // default is no neighbors of ghosts
   // default is use cutoffs, not size of particles
-  // default is no additional neighbor history info
+  // default is no associated neighbor history info in FixNeighHistory
   // default is no one-sided sphere/surface interactions (when size = 1)
   // default is neighbors of atoms, not bonds
   // default is no multilevel rRESPA neighbors
@@ -68,8 +68,6 @@ NeighRequest::NeighRequest(LAMMPS *lmp) : Pointers(lmp)
   cut = 0;
   cutoff = 0.0;
 
-  dnum = 0;
-
   // skip info, default is no skipping
   
   skip = 0;
@@ -88,11 +86,6 @@ NeighRequest::NeighRequest(LAMMPS *lmp) : Pointers(lmp)
   copylist = -1;
   halffull = 0;
   halffulllist = -1;
-  history_partner = 0;
-  historylist = -1;
-  respaouterlist = -1;
-  respamiddlelist = -1;
-  respainnerlist = -1;
   unique = 0;
 
   // internal settings
@@ -158,8 +151,6 @@ int NeighRequest::identical(NeighRequest *other)
   if (copy != other->copy) same = 0;
   if (cutoff != other->cutoff) same = 0;
 
-  if (dnum != other->dnum) same = 0;
-
   if (skip != other->skip) same = 0;
   if (skip) same = same_skip(other);
 
@@ -226,8 +217,6 @@ void NeighRequest::copy_request(NeighRequest *other, int skipflag)
   cut = other->cut;
   cutoff = other->cutoff;
 
-  dnum = other->dnum;
-
   iskip = NULL;
   ijskip = NULL;
 
diff --git a/src/neigh_request.h b/src/neigh_request.h
index 70f7783a70c7af6de31346892f2356e55ce64f8c..16e6f1a8c020e822432269448052e43a8548dc9f 100644
--- a/src/neigh_request.h
+++ b/src/neigh_request.h
@@ -59,12 +59,12 @@ class NeighRequest : protected Pointers {
   
   int ghost;             // 1 if includes ghost atom neighbors
   int size;              // 1 if pair cutoff set by particle radius
-  int history;           // 1 if stores neighbor history info
+  int history;           // 1 if there is also neigh history info (FixNeighHist)
   int granonesided;      // 1 if one-sided granular list for 
                          //   sphere/surf interactions
-  int respainner;        // 1 if a rRESPA inner list
-  int respamiddle;       // 1 if a rRESPA middle list
-  int respaouter;        // 1 if a rRESPA outer list
+  int respainner;        // 1 if need a rRESPA inner list
+  int respamiddle;       // 1 if need a rRESPA middle list
+  int respaouter;        // 1 if need a rRESPA outer list
   int bond;              // 1 if store bond neighbors instead of atom neighs
   int omp;               // set by USER-OMP package
   int intel;             // set by USER-INTEL package
@@ -74,8 +74,6 @@ class NeighRequest : protected Pointers {
   int cut;               // 1 if use a non-standard cutoff length
   double cutoff;         // special cutoff distance for this list
 
-  int dnum;              // # of extra floating point values stored in list
-
   // flags set by pair hybrid
 
   int skip;              // 1 if this list skips atom types from another list
@@ -100,21 +98,9 @@ class NeighRequest : protected Pointers {
   int halffull;          // 1 if half list computed from another full list
   int halffulllist;      // index of full list to derive half from
 
-  int history_partner;   // 1 if this list partners with a history list
-  int historylist;       // index of the associated history list
-                         // for history = 1, index of the non-history partner
-
-  int respaouterlist;    // index of respaouter/middle/inner lists
-  int respamiddlelist;   // which this rREPSA list is associated with
-  int respainnerlist;    // each rRESPA style list points at the others
-
   int unique;            // 1 if this list requires its own
                          // NStencil, Nbin class - because of requestor cutoff
 
-  // pointer to FSH class, set by requestor class (not by Neighbor)
-
-  class FixShearHistory *fix_history;  // fix that stores per-atom history info
-
   // -----------------------------  
   // internal settings made by Neighbor class
   // -----------------------------  
diff --git a/src/neighbor.cpp b/src/neighbor.cpp
index a460be0065de2b042280618032ec5b0cb776f8b6..cc2e5d6d11a4c6e6df8f0799a5d3365fee7b8330 100644
--- a/src/neighbor.cpp
+++ b/src/neighbor.cpp
@@ -133,8 +133,6 @@ pairclass(NULL), pairnames(NULL), pairmasks(NULL)
   old_pgsize = pgsize;
   old_oneatom = oneatom;
 
-  zeroes = NULL;
-
   binclass = NULL;
   binnames = NULL;
   binmasks = NULL;
@@ -208,8 +206,6 @@ Neighbor::~Neighbor()
     if (old_requests[i]) delete old_requests[i];
   memory->sfree(old_requests);
 
-  delete [] zeroes;
-
   delete [] binclass;
   delete [] binnames;
   delete [] binmasks;
@@ -666,14 +662,12 @@ int Neighbor::init_pair()
   // purpose is to avoid duplicate or inefficient builds
   // may add new requests if a needed request to derive from does not exist
   // methods:
-  //   (1) other = point history and rRESPA lists at their partner lists
+  //   (1) unique = create unique lists if cutoff is explicitly set
   //   (2) skip = create any new non-skip lists needed by pair hybrid skip lists
   //   (3) granular = adjust parent and skip lists for granular onesided usage
   //   (4) h/f = pair up any matching half/full lists
   //   (5) copy = convert as many lists as possible to copy lists
   // order of morph methods matters:
-  //   (1) before (2), b/c (2) needs to know history partner pairings
-  //   (2) after (1), b/c (2) may also need to create new history lists
   //   (3) after (2), b/c it adjusts lists created by (2)
   //   (4) after (2) and (3), 
   //       b/c (2) may create new full lists, (3) may change them
@@ -681,7 +675,7 @@ int Neighbor::init_pair()
 
   int nrequest_original = nrequest;
 
-  morph_other();
+  morph_unique();
   morph_skip();
   morph_granular();     // this method can change flags set by requestor
   morph_halffull();
@@ -827,23 +821,13 @@ int Neighbor::init_pair()
   }
 
   // allocate initial pages for each list, except if copy flag set
-  // allocate dnum vector of zeroes if set
   
-  int dnummax = 0;
   for (i = 0; i < nlist; i++) {
     if (lists[i]->copy) continue;
     lists[i]->setup_pages(pgsize,oneatom);
-    dnummax = MAX(dnummax,lists[i]->dnum);
-  }
-  
-  if (dnummax) {
-    delete [] zeroes;
-    zeroes = new double[dnummax];
-    for (i = 0; i < dnummax; i++) zeroes[i] = 0.0;
   }
 
   // first-time allocation of per-atom data for lists that are built and store
-  // lists that are not built: granhistory, respa inner/middle (no neigh_pair)
   // lists that do not store: copy 
   // use atom->nmax for both grow() args
   //   i.e. grow first time to expanded size to avoid future reallocs
@@ -923,40 +907,16 @@ int Neighbor::init_pair()
 
 /* ----------------------------------------------------------------------
    scan NeighRequests to set additional flags
-   only for history, respaouter, custom cutoff lists
+   only for custom cutoff lists
 ------------------------------------------------------------------------- */
 
-void Neighbor::morph_other()
+void Neighbor::morph_unique()
 {
   NeighRequest *irq;
   
   for (int i = 0; i < nrequest; i++) {
     irq = requests[i];
 
-    // if history, point this list and partner list at each other
-
-    if (irq->history) {
-      irq->historylist = i-1;
-      requests[i-1]->history_partner = 1;
-      requests[i-1]->historylist = i;
-    }
-
-    // if respaouter, point all associated rRESPA lists at each other
-
-    if (irq->respaouter) {
-      if (requests[i-1]->respainner) {
-        irq->respainnerlist = i-1;
-        requests[i-1]->respaouterlist = i;
-      } else {
-        irq->respamiddlelist = i-1;
-        requests[i-1]->respaouterlist = i;
-        requests[i-1]->respainnerlist = i-1;
-        irq->respainnerlist = i-2;
-        requests[i-2]->respaouterlist = i;
-        requests[i-2]->respamiddlelist = i-1;
-      }
-    }
-    
     // if cut flag set by requestor, set unique flag
     // this forces Pair,Stencil,Bin styles to be instantiated separately
 
@@ -987,8 +947,6 @@ void Neighbor::morph_skip()
     // halffull list and its full parent may both skip,
     //   but are checked to insure matching skip info
 
-    if (irq->history) continue;
-    if (irq->respainner || irq->respamiddle) continue;
     if (irq->halffull) continue;
     if (irq->copy) continue;
 
@@ -1021,12 +979,11 @@ void Neighbor::morph_skip()
       //   else 2 lists do not store same pairs
       //   or their data structures are different
       // this includes custom cutoff set by requestor
-      // no need to check respaouter b/c it stores same pairs
-      // no need to check dnum b/c only set for history
       // NOTE: need check for 2 Kokkos flags?
 
       if (irq->ghost != jrq->ghost) continue;
       if (irq->size != jrq->size) continue;
+      if (irq->history != jrq->history) continue;
       if (irq->bond != jrq->bond) continue;
       if (irq->omp != jrq->omp) continue;
       if (irq->intel != jrq->intel) continue;
@@ -1045,8 +1002,8 @@ void Neighbor::morph_skip()
     // else create a new identical list except non-skip
     // for new list, set neigh = 1, skip = 0, no skip vec/array,
     //   copy unique flag (since copy_request() will not do it)
-    // note: parents of skip lists do not have associated history list
-    //   b/c child skip lists store their own history info
+    // note: parents of skip lists do not have associated history
+    //   b/c child skip lists have the associated history
 
     if (j < nrequest) irq->skiplist = j;
     else {
@@ -1107,7 +1064,6 @@ void Neighbor::morph_granular()
       if (onesided == 2) break;
     }
 
-
     // if onesided = 2, parent has children with both granonesided = 0/1
     // force parent newton off (newton = 2) to enable onesided skip by child
     // set parent granonesided = 0, so it stores all neighs in usual manner
@@ -1159,8 +1115,6 @@ void Neighbor::morph_halffull()
     // these lists are created other ways, no need for halffull
     // do want to process skip lists
 
-    if (irq->history) continue;
-    if (irq->respainner || irq->respamiddle) continue;
     if (irq->copy) continue;
 
     // check all other lists
@@ -1179,11 +1133,10 @@ void Neighbor::morph_halffull()
       //   else 2 lists do not store same pairs
       //   or their data structures are different
       // this includes custom cutoff set by requestor
-      // no need to check respaouter b/c it stores same pairs
-      // no need to check dnum b/c only set for history
 
       if (irq->ghost != jrq->ghost) continue;
       if (irq->size != jrq->size) continue;
+      if (irq->history != jrq->history) continue;
       if (irq->bond != jrq->bond) continue;
       if (irq->omp != jrq->omp) continue;
       if (irq->intel != jrq->intel) continue;
@@ -1230,12 +1183,6 @@ void Neighbor::morph_copy()
 
     if (irq->copy) continue;
 
-    // these lists are created other ways, no need to copy
-    // skip lists are eligible to become a copy list
-
-    if (irq->history) continue;
-    if (irq->respainner || irq->respamiddle) continue;
-    
     // check all other lists
 
     for (j = 0; j < nrequest; j++) {
@@ -1272,9 +1219,9 @@ void Neighbor::morph_copy()
 
       if (irq->ghost && !jrq->ghost) continue;
 
-      // do not copy from a history list or a respa middle/inner list
+      // do not copy from a list with respa middle/inner
+      // b/c its outer list will not be complete
 
-      if (jrq->history) continue;
       if (jrq->respamiddle) continue;
       if (jrq->respainner) continue;
 
@@ -1282,12 +1229,11 @@ void Neighbor::morph_copy()
       //   else 2 lists do not store same pairs
       //   or their data structures are different
       // this includes custom cutoff set by requestor
-      // no need to check respaouter b/c it stores same pairs
       // no need to check omp b/c it stores same pairs
-      // no need to check dnum b/c only set for history
       // NOTE: need check for 2 Kokkos flags?
 
       if (irq->size != jrq->size) continue;
+      if (irq->history != jrq->history) continue;
       if (irq->bond != jrq->bond) continue;
       if (irq->intel != jrq->intel) continue;
       if (irq->kokkos_host != jrq->kokkos_host) continue;
@@ -1535,9 +1481,7 @@ void Neighbor::print_pairwise_info()
 
         // order these to get single output of most relevant
 
-        if (rq->history) 
-          fprintf(out,", history for (%d)",rq->historylist+1);
-        else if (rq->copy)
+        if (rq->copy)
           fprintf(out,", copy from (%d)",rq->copylist+1);
         else if (rq->halffull) 
           fprintf(out,", half/full from (%d)",rq->halffulllist+1);
@@ -1562,9 +1506,8 @@ void Neighbor::print_pairwise_info()
         if (rq->size) fprintf(out,", size");
         if (rq->history) fprintf(out,", history");
         if (rq->granonesided) fprintf(out,", onesided");
-        if (rq->respainner) fprintf(out,", respa outer");
-        if (rq->respamiddle) fprintf(out,", respa middle");
-        if (rq->respaouter) fprintf(out,", respa inner");
+        if (rq->respamiddle) fprintf(out,", respa outer/middle/inner");
+        else if (rq->respainner) fprintf(out,", respa outer/inner");
         if (rq->bond) fprintf(out,", bond");
         if (rq->omp) fprintf(out,", omp");
         if (rq->intel) fprintf(out,", intel");
@@ -1659,8 +1602,6 @@ int Neighbor::choose_bin(NeighRequest *rq)
 
   if (style == NSQ) return 0;
   if (rq->skip || rq->copy || rq->halffull) return 0;
-  if (rq->history) return 0;
-  if (rq->respainner || rq->respamiddle) return 0;
 
   // use request settings to match exactly one NBin class mask
   // checks are bitwise using NeighConst bit masks
@@ -1701,8 +1642,6 @@ int Neighbor::choose_stencil(NeighRequest *rq)
 
   if (style == NSQ) return 0;
   if (rq->skip || rq->copy || rq->halffull) return 0;
-  if (rq->history) return 0;
-  if (rq->respainner || rq->respamiddle) return 0;
 
   // convert newton request to newtflag = on or off
 
@@ -1793,11 +1732,6 @@ int Neighbor::choose_stencil(NeighRequest *rq)
 
 int Neighbor::choose_pair(NeighRequest *rq)
 {
-  // no neighbor list build performed
-
-  if (rq->history) return 0;
-  if (rq->respainner || rq->respamiddle) return 0;
-
   // error check for includegroup with ghost neighbor request
 
   if (includegroup && rq->ghost)
diff --git a/src/neighbor.h b/src/neighbor.h
index 64bced2293daec3bf040092a5097701ed0de1f8a..9244bc575d9b355780996e98e6fb4b63f9aa0651 100644
--- a/src/neighbor.h
+++ b/src/neighbor.h
@@ -54,7 +54,6 @@ class Neighbor : protected Pointers {
 
   double *bboxlo,*bboxhi;          // ptrs to full domain bounding box
                                    // different for orthog vs triclinic
-  double *zeroes;                  // vector of zeroes for shear history init
 
   // exclusion info, used by NeighPair
 
@@ -205,7 +204,7 @@ class Neighbor : protected Pointers {
   int init_pair();
   virtual void init_topology();
 
-  void morph_other();
+  void morph_unique();
   void morph_skip();
   void morph_granular();
   void morph_halffull();
diff --git a/src/npair.cpp b/src/npair.cpp
index 9fbb4d219db3b6f8e357b43f1e70cc3b7ab29cea..dd3a73926e0d2b363da42231ad245a13b1386280 100644
--- a/src/npair.cpp
+++ b/src/npair.cpp
@@ -66,7 +66,6 @@ void NPair::copy_neighbor_info()
   cut_inner_sq = neighbor->cut_inner_sq;
   cut_middle_sq = neighbor->cut_middle_sq;
   cut_middle_inside_sq = neighbor->cut_middle_inside_sq;
-  zeroes = neighbor->zeroes;
   bboxlo = neighbor->bboxlo;
   bboxhi = neighbor->bboxhi;
 
diff --git a/src/npair.h b/src/npair.h
index 6941b86164c674a81ecfd282af718fafe6aa646a..289a1348d92b7747f97196e7c7472fbace433c51 100644
--- a/src/npair.h
+++ b/src/npair.h
@@ -47,7 +47,6 @@ class NPair : protected Pointers {
   double cut_inner_sq;
   double cut_middle_sq;
   double cut_middle_inside_sq;
-  double *zeroes;
   double *bboxlo,*bboxhi;
 
   // exclusion data from Neighbor class
diff --git a/src/npair_copy.cpp b/src/npair_copy.cpp
index 1799d48fede85194fa9fc16989395b175d1958df..9426d22ed3595dd2157d20adbf56265d3f3bb42b 100644
--- a/src/npair_copy.cpp
+++ b/src/npair_copy.cpp
@@ -40,7 +40,5 @@ void NPairCopy::build(NeighList *list)
   list->ilist = listcopy->ilist;
   list->numneigh = listcopy->numneigh;
   list->firstneigh = listcopy->firstneigh;
-  list->firstdouble = listcopy->firstdouble;
   list->ipage = listcopy->ipage;
-  list->dpage = listcopy->dpage;
 }
diff --git a/src/npair_half_respa_bin_newtoff.cpp b/src/npair_half_respa_bin_newtoff.cpp
index 11246b4af8746f7b008e8872cebcd64cc71a66bd..0145771f4aafa11a3083c70466bb9476ad771b29 100644
--- a/src/npair_half_respa_bin_newtoff.cpp
+++ b/src/npair_half_respa_bin_newtoff.cpp
@@ -63,22 +63,19 @@ void NPairHalfRespaBinNewtoff::build(NeighList *list)
   int **firstneigh = list->firstneigh;
   MyPage<int> *ipage = list->ipage;
 
-  NeighList *listinner = list->listinner;
-  int *ilist_inner = listinner->ilist;
-  int *numneigh_inner = listinner->numneigh;
-  int **firstneigh_inner = listinner->firstneigh;
-  MyPage<int> *ipage_inner = listinner->ipage;
+  int *ilist_inner = list->ilist_inner;
+  int *numneigh_inner = list->numneigh_inner;
+  int **firstneigh_inner = list->firstneigh_inner;
+  MyPage<int> *ipage_inner = list->ipage_inner;
 
-  NeighList *listmiddle;
   int *ilist_middle,*numneigh_middle,**firstneigh_middle;
   MyPage<int> *ipage_middle;
   int respamiddle = list->respamiddle;
   if (respamiddle) {
-    listmiddle = list->listmiddle;
-    ilist_middle = listmiddle->ilist;
-    numneigh_middle = listmiddle->numneigh;
-    firstneigh_middle = listmiddle->firstneigh;
-    ipage_middle = listmiddle->ipage;
+    ilist_middle = list->ilist_middle;
+    numneigh_middle = list->numneigh_middle;
+    firstneigh_middle = list->firstneigh_middle;
+    ipage_middle = list->ipage_middle;
   }
 
   int inum = 0;
@@ -185,6 +182,6 @@ void NPairHalfRespaBinNewtoff::build(NeighList *list)
   }
 
   list->inum = inum;
-  listinner->inum = inum;
-  if (respamiddle) listmiddle->inum = inum;
+  list->inum_inner = inum;
+  if (respamiddle) list->inum_middle = inum;
 }
diff --git a/src/npair_half_respa_bin_newton.cpp b/src/npair_half_respa_bin_newton.cpp
index db766780360b2fc382eec5a87e9a1c8d5722da9d..72a613204d889c8d7c5873d424aa8d9491b69f48 100644
--- a/src/npair_half_respa_bin_newton.cpp
+++ b/src/npair_half_respa_bin_newton.cpp
@@ -62,22 +62,19 @@ void NPairHalfRespaBinNewton::build(NeighList *list)
   int **firstneigh = list->firstneigh;
   MyPage<int> *ipage = list->ipage;
 
-  NeighList *listinner = list->listinner;
-  int *ilist_inner = listinner->ilist;
-  int *numneigh_inner = listinner->numneigh;
-  int **firstneigh_inner = listinner->firstneigh;
-  MyPage<int> *ipage_inner = listinner->ipage;
+  int *ilist_inner = list->ilist_inner;
+  int *numneigh_inner = list->numneigh_inner;
+  int **firstneigh_inner = list->firstneigh_inner;
+  MyPage<int> *ipage_inner = list->ipage_inner;
 
-  NeighList *listmiddle;
   int *ilist_middle,*numneigh_middle,**firstneigh_middle;
   MyPage<int> *ipage_middle;
   int respamiddle = list->respamiddle;
   if (respamiddle) {
-    listmiddle = list->listmiddle;
-    ilist_middle = listmiddle->ilist;
-    numneigh_middle = listmiddle->numneigh;
-    firstneigh_middle = listmiddle->firstneigh;
-    ipage_middle = listmiddle->ipage;
+    ilist_middle = list->ilist_middle;
+    numneigh_middle = list->numneigh_middle;
+    firstneigh_middle = list->firstneigh_middle;
+    ipage_middle = list->ipage_middle;
   }
 
   int inum = 0;
@@ -231,6 +228,6 @@ void NPairHalfRespaBinNewton::build(NeighList *list)
   }
 
   list->inum = inum;
-  listinner->inum = inum;
-  if (respamiddle) listmiddle->inum = inum;
+  list->inum_inner = inum;
+  if (respamiddle) list->inum_middle = inum;
 }
diff --git a/src/npair_half_respa_bin_newton_tri.cpp b/src/npair_half_respa_bin_newton_tri.cpp
index 4ec6685e1d4580024458891ae3da7a436b79753b..add1cf6e5cb50b36fed9dc0d30b8fff7e554ecbe 100644
--- a/src/npair_half_respa_bin_newton_tri.cpp
+++ b/src/npair_half_respa_bin_newton_tri.cpp
@@ -63,22 +63,19 @@ void NPairHalfRespaBinNewtonTri::build(NeighList *list)
   int **firstneigh = list->firstneigh;
   MyPage<int> *ipage = list->ipage;
 
-  NeighList *listinner = list->listinner;
-  int *ilist_inner = listinner->ilist;
-  int *numneigh_inner = listinner->numneigh;
-  int **firstneigh_inner = listinner->firstneigh;
-  MyPage<int> *ipage_inner = listinner->ipage;
+  int *ilist_inner = list->ilist_inner;
+  int *numneigh_inner = list->numneigh_inner;
+  int **firstneigh_inner = list->firstneigh_inner;
+  MyPage<int> *ipage_inner = list->ipage_inner;
 
-  NeighList *listmiddle;
   int *ilist_middle,*numneigh_middle,**firstneigh_middle;
   MyPage<int> *ipage_middle;
   int respamiddle = list->respamiddle;
   if (respamiddle) {
-    listmiddle = list->listmiddle;
-    ilist_middle = listmiddle->ilist;
-    numneigh_middle = listmiddle->numneigh;
-    firstneigh_middle = listmiddle->firstneigh;
-    ipage_middle = listmiddle->ipage;
+    ilist_middle = list->ilist_middle;
+    numneigh_middle = list->numneigh_middle;
+    firstneigh_middle = list->firstneigh_middle;
+    ipage_middle = list->ipage_middle;
   }
 
   int inum = 0;
@@ -193,6 +190,6 @@ void NPairHalfRespaBinNewtonTri::build(NeighList *list)
   }
 
   list->inum = inum;
-  listinner->inum = inum;
-  if (respamiddle) listmiddle->inum = inum;
+  list->inum_inner = inum;
+  if (respamiddle) list->inum_middle = inum;
 }
diff --git a/src/npair_half_respa_nsq_newtoff.cpp b/src/npair_half_respa_nsq_newtoff.cpp
index 1bb2034384f5bd72915f032f747748923feee0c2..c0e932f0aeededae43300ff794cd5508fc8eb2b5 100644
--- a/src/npair_half_respa_nsq_newtoff.cpp
+++ b/src/npair_half_respa_nsq_newtoff.cpp
@@ -67,22 +67,19 @@ void NPairHalfRespaNsqNewtoff::build(NeighList *list)
   int **firstneigh = list->firstneigh;
   MyPage<int> *ipage = list->ipage;
 
-  NeighList *listinner = list->listinner;
-  int *ilist_inner = listinner->ilist;
-  int *numneigh_inner = listinner->numneigh;
-  int **firstneigh_inner = listinner->firstneigh;
-  MyPage<int> *ipage_inner = listinner->ipage;
+  int *ilist_inner = list->ilist_inner;
+  int *numneigh_inner = list->numneigh_inner;
+  int **firstneigh_inner = list->firstneigh_inner;
+  MyPage<int> *ipage_inner = list->ipage_inner;
 
-  NeighList *listmiddle;
   int *ilist_middle,*numneigh_middle,**firstneigh_middle;
   MyPage<int> *ipage_middle;
   int respamiddle = list->respamiddle;
   if (respamiddle) {
-    listmiddle = list->listmiddle;
-    ilist_middle = listmiddle->ilist;
-    numneigh_middle = listmiddle->numneigh;
-    firstneigh_middle = listmiddle->firstneigh;
-    ipage_middle = listmiddle->ipage;
+    ilist_middle = list->ilist_middle;
+    numneigh_middle = list->numneigh_middle;
+    firstneigh_middle = list->firstneigh_middle;
+    ipage_middle = list->ipage_middle;
   }
 
   int inum = 0;
@@ -180,6 +177,6 @@ void NPairHalfRespaNsqNewtoff::build(NeighList *list)
   }
 
   list->inum = inum;
-  listinner->inum = inum;
-  if (respamiddle) listmiddle->inum = inum;
+  list->inum_inner = inum;
+  if (respamiddle) list->inum_middle = inum;
 }
diff --git a/src/npair_half_respa_nsq_newton.cpp b/src/npair_half_respa_nsq_newton.cpp
index 9aacc702cc854caab4faded04a6090e7ae7e8c7a..f7d161896d47bc520278b68fd0674ef5fa2eae82 100644
--- a/src/npair_half_respa_nsq_newton.cpp
+++ b/src/npair_half_respa_nsq_newton.cpp
@@ -69,22 +69,19 @@ void NPairHalfRespaNsqNewton::build(NeighList *list)
   int **firstneigh = list->firstneigh;
   MyPage<int> *ipage = list->ipage;
 
-  NeighList *listinner = list->listinner;
-  int *ilist_inner = listinner->ilist;
-  int *numneigh_inner = listinner->numneigh;
-  int **firstneigh_inner = listinner->firstneigh;
-  MyPage<int> *ipage_inner = listinner->ipage;
+  int *ilist_inner = list->ilist_inner;
+  int *numneigh_inner = list->numneigh_inner;
+  int **firstneigh_inner = list->firstneigh_inner;
+  MyPage<int> *ipage_inner = list->ipage_inner;
 
-  NeighList *listmiddle;
   int *ilist_middle,*numneigh_middle,**firstneigh_middle;
   MyPage<int> *ipage_middle;
   int respamiddle = list->respamiddle;
   if (respamiddle) {
-    listmiddle = list->listmiddle;
-    ilist_middle = listmiddle->ilist;
-    numneigh_middle = listmiddle->numneigh;
-    firstneigh_middle = listmiddle->firstneigh;
-    ipage_middle = listmiddle->ipage;
+    ilist_middle = list->ilist_middle;
+    numneigh_middle = list->numneigh_middle;
+    firstneigh_middle = list->firstneigh_middle;
+    ipage_middle = list->ipage_middle;
   }
 
   int inum = 0;
@@ -200,6 +197,6 @@ void NPairHalfRespaNsqNewton::build(NeighList *list)
   }
 
   list->inum = inum;
-  listinner->inum = inum;
-  if (respamiddle) listmiddle->inum = inum;
+  list->inum_inner = inum;
+  if (respamiddle) list->inum_middle = inum;
 }
diff --git a/src/npair_half_size_bin_newtoff.cpp b/src/npair_half_size_bin_newtoff.cpp
index 571b2484ea425aee53439974d08eb974d0cf2604..cf608b5d595ab5d27cbc287b64b78a77efdb317a 100644
--- a/src/npair_half_size_bin_newtoff.cpp
+++ b/src/npair_half_size_bin_newtoff.cpp
@@ -17,9 +17,6 @@
 #include "neigh_list.h"
 #include "atom.h"
 #include "atom_vec.h"
-#include "molecule.h"
-#include "domain.h"
-#include "fix_shear_history.h"
 #include "my_page.h"
 #include "error.h"
 
@@ -32,7 +29,6 @@ NPairHalfSizeBinNewtoff::NPairHalfSizeBinNewtoff(LAMMPS *lmp) : NPair(lmp) {}
 /* ----------------------------------------------------------------------
    size particles
    binned neighbor list construction with partial Newton's 3rd law
-   shear history must be accounted for when a neighbor pair is added
    each owned atom i checks own bin and surrounding bins in non-Newton stencil
    pair stored once if i,j are both owned and i < j
    pair stored by me if j is ghost (also stored by proc owning j)
@@ -40,20 +36,10 @@ NPairHalfSizeBinNewtoff::NPairHalfSizeBinNewtoff(LAMMPS *lmp) : NPair(lmp) {}
 
 void NPairHalfSizeBinNewtoff::build(NeighList *list)
 {
-  int i,j,k,m,n,nn,ibin,dnum,dnumbytes;
+  int i,j,k,m,n,nn,ibin;
   double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
   double radi,radsum,cutsq;
-  int *neighptr,*touchptr;
-  double *shearptr;
-
-  int *npartner;
-  tagint **partner;
-  double **shearpartner;
-  int **firsttouch;
-  double **firstshear;
-  MyPage<int> *ipage_touch;
-  MyPage<double> *dpage_shear;
-  NeighList *listhistory;
+  int *neighptr;
 
   double **x = atom->x;
   double *radius = atom->radius;
@@ -64,42 +50,20 @@ void NPairHalfSizeBinNewtoff::build(NeighList *list)
   int nlocal = atom->nlocal;
   if (includegroup) nlocal = atom->nfirst;
 
+  int history = list->history;
   int *ilist = list->ilist;
   int *numneigh = list->numneigh;
   int **firstneigh = list->firstneigh;
   MyPage<int> *ipage = list->ipage;
 
-  FixShearHistory *fix_history = (FixShearHistory *) list->fix_history;
-  if (fix_history) {
-    fix_history->nlocal_neigh = nlocal;
-    fix_history->nall_neigh = nlocal + atom->nghost;
-    npartner = fix_history->npartner;
-    partner = fix_history->partner;
-    shearpartner = fix_history->shearpartner;
-    listhistory = list->listhistory;
-    firsttouch = listhistory->firstneigh;
-    firstshear = listhistory->firstdouble;
-    ipage_touch = listhistory->ipage;
-    dpage_shear = listhistory->dpage;
-    dnum = listhistory->dnum;
-    dnumbytes = dnum * sizeof(double);
-  }
+  int mask_history = 3 << SBBITS;
 
   int inum = 0;
   ipage->reset();
-  if (fix_history) {
-    ipage_touch->reset();
-    dpage_shear->reset();
-  }
 
   for (i = 0; i < nlocal; i++) {
     n = 0;
     neighptr = ipage->vget();
-    if (fix_history) {
-      nn = 0;
-      touchptr = ipage_touch->vget();
-      shearptr = dpage_shear->vget();
-    }
 
     xtmp = x[i][0];
     ytmp = x[i][1];
@@ -116,38 +80,19 @@ void NPairHalfSizeBinNewtoff::build(NeighList *list)
       for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
         if (j <= i) continue;
         if (exclude && exclusion(i,j,type[i],type[j],mask,molecule)) continue;
-
+        
         delx = xtmp - x[j][0];
         dely = ytmp - x[j][1];
         delz = ztmp - x[j][2];
         rsq = delx*delx + dely*dely + delz*delz;
         radsum = radi + radius[j];
         cutsq = (radsum+skin) * (radsum+skin);
-
+        
         if (rsq <= cutsq) {
-          neighptr[n] = j;
-
-          if (fix_history) {
-            if (rsq < radsum*radsum) {
-              for (m = 0; m < npartner[i]; m++)
-                if (partner[i][m] == tag[j]) break;
-              if (m < npartner[i]) {
-                touchptr[n] = 1;
-                memcpy(&shearptr[nn],&shearpartner[i][dnum*m],dnumbytes);
-                nn += dnum;
-              } else {
-                touchptr[n] = 0;
-                memcpy(&shearptr[nn],zeroes,dnumbytes);
-                nn += dnum;
-              }
-            } else {
-              touchptr[n] = 0;
-              memcpy(&shearptr[nn],zeroes,dnumbytes);
-              nn += dnum;
-            }
-          }
-
-          n++;
+          if (history && rsq < radsum*radsum)
+            neighptr[n++] = j ^ mask_history;
+          else
+            neighptr[n++] = j;
         }
       }
     }
@@ -158,13 +103,6 @@ void NPairHalfSizeBinNewtoff::build(NeighList *list)
     ipage->vgot(n);
     if (ipage->status())
       error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-
-    if (fix_history) {
-      firsttouch[i] = touchptr;
-      firstshear[i] = shearptr;
-      ipage_touch->vgot(n);
-      dpage_shear->vgot(nn);
-    }
   }
 
   list->inum = inum;
diff --git a/src/npair_half_size_bin_newton.cpp b/src/npair_half_size_bin_newton.cpp
index 4f4ecccb165d4289108abcbade6ff14d51ae9c8b..662bf91d6e9606d4c00a1a42ce130c34beea17a6 100644
--- a/src/npair_half_size_bin_newton.cpp
+++ b/src/npair_half_size_bin_newton.cpp
@@ -17,9 +17,6 @@
 #include "neigh_list.h"
 #include "atom.h"
 #include "atom_vec.h"
-#include "molecule.h"
-#include "domain.h"
-#include "fix_shear_history.h"
 #include "my_page.h"
 #include "error.h"
 
@@ -32,27 +29,16 @@ NPairHalfSizeBinNewton::NPairHalfSizeBinNewton(LAMMPS *lmp) : NPair(lmp) {}
 /* ----------------------------------------------------------------------
    size particles
    binned neighbor list construction with full Newton's 3rd law
-   shear history must be accounted for when a neighbor pair is added
    each owned atom i checks its own bin and other bins in Newton stencil
    every pair stored exactly once by some processor
 ------------------------------------------------------------------------- */
 
 void NPairHalfSizeBinNewton::build(NeighList *list)
 {
-  int i,j,k,m,n,nn,ibin,dnum,dnumbytes;
+  int i,j,k,m,n,nn,ibin;
   double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
   double radi,radsum,cutsq;
-  int *neighptr,*touchptr;
-  double *shearptr;
-
-  int *npartner;
-  tagint **partner;
-  double **shearpartner;
-  int **firsttouch;
-  double **firstshear;
-  MyPage<int> *ipage_touch;
-  MyPage<double> *dpage_shear;
-  NeighList *listhistory;
+  int *neighptr;
 
   double **x = atom->x;
   double *radius = atom->radius;
@@ -63,42 +49,20 @@ void NPairHalfSizeBinNewton::build(NeighList *list)
   int nlocal = atom->nlocal;
   if (includegroup) nlocal = atom->nfirst;
 
+  int history = list->history;
   int *ilist = list->ilist;
   int *numneigh = list->numneigh;
   int **firstneigh = list->firstneigh;
   MyPage<int> *ipage = list->ipage;
 
-  FixShearHistory *fix_history = (FixShearHistory *) list->fix_history;
-  if (fix_history) {
-    fix_history->nlocal_neigh = nlocal;
-    fix_history->nall_neigh = nlocal + atom->nghost;
-    npartner = fix_history->npartner;
-    partner = fix_history->partner;
-    shearpartner = fix_history->shearpartner;
-    listhistory = list->listhistory;
-    firsttouch = listhistory->firstneigh;
-    firstshear = listhistory->firstdouble;
-    ipage_touch = listhistory->ipage;
-    dpage_shear = listhistory->dpage;
-    dnum = listhistory->dnum;
-    dnumbytes = dnum * sizeof(double);
-  }
+  int mask_history = 3 << SBBITS;
 
   int inum = 0;
   ipage->reset();
-  if (fix_history) {
-    ipage_touch->reset();
-    dpage_shear->reset();
-  }
 
   for (i = 0; i < nlocal; i++) {
     n = 0;
     neighptr = ipage->vget();
-    if (fix_history) {
-      nn = 0;
-      touchptr = ipage_touch->vget();
-      shearptr = dpage_shear->vget();
-    }
 
     xtmp = x[i][0];
     ytmp = x[i][1];
@@ -128,29 +92,10 @@ void NPairHalfSizeBinNewton::build(NeighList *list)
       cutsq = (radsum+skin) * (radsum+skin);
 
       if (rsq <= cutsq) {
-        neighptr[n] = j;
-
-        if (fix_history) {
-          if (rsq < radsum*radsum) {
-            for (m = 0; m < npartner[i]; m++)
-              if (partner[i][m] == tag[j]) break;
-            if (m < npartner[i]) {
-              touchptr[n] = 1;
-              memcpy(&shearptr[nn],&shearpartner[i][dnum*m],dnumbytes);
-              nn += dnum;
-            } else {
-              touchptr[n] = 0;
-              memcpy(&shearptr[nn],zeroes,dnumbytes);
-              nn += dnum;
-            }
-          } else {
-            touchptr[n] = 0;
-            memcpy(&shearptr[nn],zeroes,dnumbytes);
-            nn += dnum;
-          }
-        }
-
-        n++;
+        if (history && rsq < radsum*radsum) 
+          neighptr[n++] = j ^ mask_history;
+        else 
+          neighptr[n++] = j;
       }
     }
 
@@ -169,29 +114,10 @@ void NPairHalfSizeBinNewton::build(NeighList *list)
         cutsq = (radsum+skin) * (radsum+skin);
 
         if (rsq <= cutsq) {
-          neighptr[n] = j;
-
-          if (fix_history) {
-            if (rsq < radsum*radsum) {
-              for (m = 0; m < npartner[i]; m++)
-                if (partner[i][m] == tag[j]) break;
-              if (m < npartner[i]) {
-                touchptr[n] = 1;
-                memcpy(&shearptr[nn],&shearpartner[i][dnum*m],dnumbytes);
-                nn += dnum;
-              } else {
-                touchptr[n] = 0;
-                memcpy(&shearptr[nn],zeroes,dnumbytes);
-                nn += dnum;
-              }
-            } else {
-              touchptr[n] = 0;
-              memcpy(&shearptr[nn],zeroes,dnumbytes);
-              nn += dnum;
-            }
-          }
-
-          n++;
+          if (history && rsq < radsum*radsum) 
+            neighptr[n++] = j ^ mask_history;
+          else
+            neighptr[n++] = j;
         }
       }
     }
@@ -202,13 +128,6 @@ void NPairHalfSizeBinNewton::build(NeighList *list)
     ipage->vgot(n);
     if (ipage->status())
       error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-
-    if (fix_history) {
-      firsttouch[i] = touchptr;
-      firstshear[i] = shearptr;
-      ipage_touch->vgot(n);
-      dpage_shear->vgot(nn);
-    }
   }
 
   list->inum = inum;
diff --git a/src/npair_half_size_bin_newton_tri.cpp b/src/npair_half_size_bin_newton_tri.cpp
index 559eb09a7a5f731e4a1398f36644bb0d8a03f7f5..e70c0722804dad9ef06858a7d2f37d77aa33dd62 100644
--- a/src/npair_half_size_bin_newton_tri.cpp
+++ b/src/npair_half_size_bin_newton_tri.cpp
@@ -17,9 +17,6 @@
 #include "neigh_list.h"
 #include "atom.h"
 #include "atom_vec.h"
-#include "molecule.h"
-#include "domain.h"
-#include "fix_shear_history.h"
 #include "my_page.h"
 #include "error.h"
 
@@ -33,27 +30,16 @@ NPairHalfSizeBinNewtonTri::NPairHalfSizeBinNewtonTri(LAMMPS *lmp) :
 /* ----------------------------------------------------------------------
    size particles
    binned neighbor list construction with Newton's 3rd law for triclinic
-   shear history must be accounted for when a neighbor pair is added
    each owned atom i checks its own bin and other bins in triclinic stencil
    every pair stored exactly once by some processor
 ------------------------------------------------------------------------- */
 
 void NPairHalfSizeBinNewtonTri::build(NeighList *list)
 {
-  int i,j,k,m,n,nn,ibin,dnum,dnumbytes;
+  int i,j,k,m,n,nn,ibin;
   double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
   double radi,radsum,cutsq;
-  int *neighptr,*touchptr;
-  double *shearptr;
-
-  int *npartner;
-  tagint **partner;
-  double **shearpartner;
-  int **firsttouch;
-  double **firstshear;
-  MyPage<int> *ipage_touch;
-  MyPage<double> *dpage_shear;
-  NeighList *listhistory;
+  int *neighptr;
 
   double **x = atom->x;
   double *radius = atom->radius;
@@ -64,42 +50,20 @@ void NPairHalfSizeBinNewtonTri::build(NeighList *list)
   int nlocal = atom->nlocal;
   if (includegroup) nlocal = atom->nfirst;
 
+  int history = list->history;
   int *ilist = list->ilist;
   int *numneigh = list->numneigh;
   int **firstneigh = list->firstneigh;
   MyPage<int> *ipage = list->ipage;
 
-  FixShearHistory *fix_history = (FixShearHistory *) list->fix_history;
-  if (fix_history) {
-    fix_history->nlocal_neigh = nlocal;
-    fix_history->nall_neigh = nlocal + atom->nghost;
-    npartner = fix_history->npartner;
-    partner = fix_history->partner;
-    shearpartner = fix_history->shearpartner;
-    listhistory = list->listhistory;
-    firsttouch = listhistory->firstneigh;
-    firstshear = listhistory->firstdouble;
-    ipage_touch = listhistory->ipage;
-    dpage_shear = listhistory->dpage;
-    dnum = listhistory->dnum;
-    dnumbytes = dnum * sizeof(double);
-  }
+  int mask_history = 3 << SBBITS;
 
   int inum = 0;
   ipage->reset();
-  if (fix_history) {
-    ipage_touch->reset();
-    dpage_shear->reset();
-  }
 
   for (i = 0; i < nlocal; i++) {
     n = 0;
     neighptr = ipage->vget();
-    if (fix_history) {
-      nn = 0;
-      touchptr = ipage_touch->vget();
-      shearptr = dpage_shear->vget();
-    }
 
     xtmp = x[i][0];
     ytmp = x[i][1];
@@ -134,29 +98,10 @@ void NPairHalfSizeBinNewtonTri::build(NeighList *list)
         cutsq = (radsum+skin) * (radsum+skin);
 
         if (rsq <= cutsq) {
-          neighptr[n++] = j;
-
-          if (fix_history) {
-            if (rsq < radsum*radsum) {
-              for (m = 0; m < npartner[i]; m++)
-                if (partner[i][m] == tag[j]) break;
-              if (m < npartner[i]) {
-                touchptr[n] = 1;
-                memcpy(&shearptr[nn],&shearpartner[i][dnum*m],dnumbytes);
-                nn += dnum;
-              } else {
-                touchptr[n] = 0;
-                memcpy(&shearptr[nn],zeroes,dnumbytes);
-                nn += dnum;
-              }
-            } else {
-              touchptr[n] = 0;
-              memcpy(&shearptr[nn],zeroes,dnumbytes);
-              nn += dnum;
-            }
-          }
-
-          n++;
+          if (history && rsq < radsum*radsum)
+            neighptr[n++] = j ^ mask_history;
+          else
+            neighptr[n++] = j;
         }
       }
     }
@@ -167,13 +112,6 @@ void NPairHalfSizeBinNewtonTri::build(NeighList *list)
     ipage->vgot(n);
     if (ipage->status())
       error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-
-    if (fix_history) {
-      firsttouch[i] = touchptr;
-      firstshear[i] = shearptr;
-      ipage_touch->vgot(n);
-      dpage_shear->vgot(nn);
-    }
   }
 
   list->inum = inum;
diff --git a/src/npair_half_size_nsq_newtoff.cpp b/src/npair_half_size_nsq_newtoff.cpp
index 56630a9dc803a4cd5053eea8676b5044eae5cdca..e6f5cba657f79ec1318e4a15b2050e34d0c480e5 100644
--- a/src/npair_half_size_nsq_newtoff.cpp
+++ b/src/npair_half_size_nsq_newtoff.cpp
@@ -18,9 +18,6 @@
 #include "atom.h"
 #include "atom_vec.h"
 #include "group.h"
-#include "molecule.h"
-#include "domain.h"
-#include "fix_shear_history.h"
 #include "my_page.h"
 #include "error.h"
 
@@ -33,27 +30,16 @@ NPairHalfSizeNsqNewtoff::NPairHalfSizeNsqNewtoff(LAMMPS *lmp) : NPair(lmp) {}
 /* ----------------------------------------------------------------------
    size particles
    N^2 / 2 search for neighbor pairs with partial Newton's 3rd law
-   shear history must be accounted for when a neighbor pair is added
    pair added to list if atoms i and j are both owned and i < j
    pair added if j is ghost (also stored by proc owning j)
 ------------------------------------------------------------------------- */
 
 void NPairHalfSizeNsqNewtoff::build(NeighList *list)
 {
-  int i,j,m,n,nn,bitmask,dnum,dnumbytes;
+  int i,j,m,n,nn,bitmask;
   double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
   double radi,radsum,cutsq;
-  int *neighptr,*touchptr;
-  double *shearptr;
-
-  int *npartner;
-  tagint **partner;
-  double **shearpartner;
-  int **firsttouch;
-  double **firstshear;
-  MyPage<int> *ipage_touch;
-  MyPage<double> *dpage_shear;
-  NeighList *listhistory;
+  int *neighptr;
 
   double **x = atom->x;
   double *radius = atom->radius;
@@ -68,42 +54,20 @@ void NPairHalfSizeNsqNewtoff::build(NeighList *list)
     bitmask = group->bitmask[includegroup];
   }
 
+  int history = list->history;
   int *ilist = list->ilist;
   int *numneigh = list->numneigh;
   int **firstneigh = list->firstneigh;
   MyPage<int> *ipage = list->ipage;
 
-  FixShearHistory *fix_history = (FixShearHistory *) list->fix_history;
-  if (fix_history) {
-    fix_history->nlocal_neigh = nlocal;
-    fix_history->nall_neigh = nall;
-    npartner = fix_history->npartner;
-    partner = fix_history->partner;
-    shearpartner = fix_history->shearpartner;
-    listhistory = list->listhistory;
-    firsttouch = listhistory->firstneigh;
-    firstshear = listhistory->firstdouble;
-    ipage_touch = listhistory->ipage;
-    dpage_shear = listhistory->dpage;
-    dnum = listhistory->dnum;
-    dnumbytes = dnum * sizeof(double);
-  }
+  int mask_history = 3 << SBBITS;
 
   int inum = 0;
   ipage->reset();
-  if (fix_history) {
-    ipage_touch->reset();
-    dpage_shear->reset();
-  }
 
   for (i = 0; i < nlocal; i++) {
     n = 0;
     neighptr = ipage->vget();
-    if (fix_history) {
-      nn = 0;
-      touchptr = ipage_touch->vget();
-      shearptr = dpage_shear->vget();
-    }
 
     xtmp = x[i][0];
     ytmp = x[i][1];
@@ -124,29 +88,10 @@ void NPairHalfSizeNsqNewtoff::build(NeighList *list)
       cutsq = (radsum+skin) * (radsum+skin);
 
       if (rsq <= cutsq) {
-        neighptr[n] = j;
-
-        if (fix_history) {
-          if (rsq < radsum*radsum) {
-            for (m = 0; m < npartner[i]; m++)
-              if (partner[i][m] == tag[j]) break;
-            if (m < npartner[i]) {
-              touchptr[n] = 1;
-              memcpy(&shearptr[nn],&shearpartner[i][dnum*m],dnumbytes);
-              nn += dnum;
-            } else {
-              touchptr[n] = 0;
-              memcpy(&shearptr[nn],zeroes,dnumbytes);
-              nn += dnum;
-            }
-          } else {
-            touchptr[n] = 0;
-            memcpy(&shearptr[nn],zeroes,dnumbytes);
-            nn += dnum;
-          }
-        }
-
-        n++;
+        if (history && rsq < radsum*radsum)
+          neighptr[n++] = j ^ mask_history;
+        else
+          neighptr[n++] = j;
       }
     }
 
@@ -156,13 +101,6 @@ void NPairHalfSizeNsqNewtoff::build(NeighList *list)
     ipage->vgot(n);
     if (ipage->status())
       error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-
-    if (fix_history) {
-      firsttouch[i] = touchptr;
-      firstshear[i] = shearptr;
-      ipage_touch->vgot(n);
-      dpage_shear->vgot(nn);
-    }
   }
 
   list->inum = inum;
diff --git a/src/npair_half_size_nsq_newton.cpp b/src/npair_half_size_nsq_newton.cpp
index 177685b9fc21a5107ee44d2449b083b9ac2558a1..78811170cbd50450234a70b1f580ab2a91df43fc 100644
--- a/src/npair_half_size_nsq_newton.cpp
+++ b/src/npair_half_size_nsq_newton.cpp
@@ -18,9 +18,6 @@
 #include "atom.h"
 #include "atom_vec.h"
 #include "group.h"
-#include "molecule.h"
-#include "domain.h"
-#include "fix_shear_history.h"
 #include "my_page.h"
 #include "error.h"
 
@@ -33,7 +30,6 @@ NPairHalfSizeNsqNewton::NPairHalfSizeNsqNewton(LAMMPS *lmp) : NPair(lmp) {}
 /* ----------------------------------------------------------------------
    size particles
    N^2 / 2 search for neighbor pairs with full Newton's 3rd law
-   shear history must be accounted for when a neighbor pair is added
    pair added to list if atoms i and j are both owned and i < j
    if j is ghost only me or other proc adds pair
    decision based on itag,jtag tests
@@ -41,20 +37,10 @@ NPairHalfSizeNsqNewton::NPairHalfSizeNsqNewton(LAMMPS *lmp) : NPair(lmp) {}
 
 void NPairHalfSizeNsqNewton::build(NeighList *list)
 {
-  int i,j,m,n,nn,itag,jtag,bitmask,dnum,dnumbytes;
+  int i,j,m,n,nn,itag,jtag,bitmask;
   double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
   double radi,radsum,cutsq;
-  int *neighptr,*touchptr;
-  double *shearptr;
-
-  int *npartner;
-  tagint **partner;
-  double **shearpartner;
-  int **firsttouch;
-  double **firstshear;
-  MyPage<int> *ipage_touch;
-  MyPage<double> *dpage_shear;
-  NeighList *listhistory;
+  int *neighptr;
 
   double **x = atom->x;
   double *radius = atom->radius;
@@ -69,42 +55,20 @@ void NPairHalfSizeNsqNewton::build(NeighList *list)
     bitmask = group->bitmask[includegroup];
   }
 
+  int history = list->history;
   int *ilist = list->ilist;
   int *numneigh = list->numneigh;
   int **firstneigh = list->firstneigh;
   MyPage<int> *ipage = list->ipage;
 
-  FixShearHistory *fix_history = (FixShearHistory *) list->fix_history;
-  if (fix_history) {
-    fix_history->nlocal_neigh = nlocal;
-    fix_history->nall_neigh = nall;
-    npartner = fix_history->npartner;
-    partner = fix_history->partner;
-    shearpartner = fix_history->shearpartner;
-    listhistory = list->listhistory;
-    firsttouch = listhistory->firstneigh;
-    firstshear = listhistory->firstdouble;
-    ipage_touch = listhistory->ipage;
-    dpage_shear = listhistory->dpage;
-    dnum = listhistory->dnum;
-    dnumbytes = dnum * sizeof(double);
-  }
+  int mask_history = 3 << SBBITS;
 
   int inum = 0;
   ipage->reset();
-  if (fix_history) {
-    ipage_touch->reset();
-    dpage_shear->reset();
-  }
 
   for (i = 0; i < nlocal; i++) {
     n = 0;
     neighptr = ipage->vget();
-    if (fix_history) {
-      nn = 0;
-      touchptr = ipage_touch->vget();
-      shearptr = dpage_shear->vget();
-    }
 
     itag = tag[i];
     xtmp = x[i][0];
@@ -142,29 +106,10 @@ void NPairHalfSizeNsqNewton::build(NeighList *list)
       cutsq = (radsum+skin) * (radsum+skin);
 
       if (rsq <= cutsq) {
-        neighptr[n] = j;
-
-        if (fix_history) {
-          if (rsq < radsum*radsum) {
-            for (m = 0; m < npartner[i]; m++)
-              if (partner[i][m] == tag[j]) break;
-            if (m < npartner[i]) {
-              touchptr[n] = 1;
-              memcpy(&shearptr[nn],&shearpartner[i][dnum*m],dnumbytes);
-              nn += dnum;
-            } else {
-              touchptr[n] = 0;
-              memcpy(&shearptr[nn],zeroes,dnumbytes);
-              nn += dnum;
-            }
-          } else {
-            touchptr[n] = 0;
-            memcpy(&shearptr[nn],zeroes,dnumbytes);
-            nn += dnum;
-          }
-        }
-
-        n++;
+        if (history && rsq < radsum*radsum)
+          neighptr[n++] = j ^ mask_history;
+        else
+          neighptr[n++] = j;
       }
     }
 
@@ -174,13 +119,6 @@ void NPairHalfSizeNsqNewton::build(NeighList *list)
     ipage->vgot(n);
     if (ipage->status())
       error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-
-    if (fix_history) {
-      firsttouch[i] = touchptr;
-      firstshear[i] = shearptr;
-      ipage_touch->vgot(n);
-      dpage_shear->vgot(nn);
-    }
   }
 
   list->inum = inum;
diff --git a/src/npair_skip_respa.cpp b/src/npair_skip_respa.cpp
index 31420b32d1e69ad80025e231d0a76064f0b5bc53..1d4eda53549f9ec180dd7e9d2c66c0999a2fd88f 100644
--- a/src/npair_skip_respa.cpp
+++ b/src/npair_skip_respa.cpp
@@ -53,28 +53,24 @@ void NPairSkipRespa::build(NeighList *list)
   int *iskip = list->iskip;
   int **ijskip = list->ijskip;
 
-  NeighList *listinner = list->listinner;
-  int *ilist_inner = listinner->ilist;
-  int *numneigh_inner = listinner->numneigh;
-  int **firstneigh_inner = listinner->firstneigh;
-  MyPage<int> *ipage_inner = listinner->ipage;
+  int *ilist_inner = list->ilist_inner;
+  int *numneigh_inner = list->numneigh_inner;
+  int **firstneigh_inner = list->firstneigh_inner;
+  MyPage<int> *ipage_inner = list->ipage_inner;
+  int *numneigh_inner_skip = list->listskip->numneigh_inner;
+  int **firstneigh_inner_skip = list->listskip->firstneigh_inner;
 
-  int *numneigh_inner_skip = list->listskip->listinner->numneigh;
-  int **firstneigh_inner_skip = list->listskip->listinner->firstneigh;
-
-  NeighList *listmiddle;
   int *ilist_middle,*numneigh_middle,**firstneigh_middle;
   MyPage<int> *ipage_middle;
   int *numneigh_middle_skip,**firstneigh_middle_skip;
   int respamiddle = list->respamiddle;
   if (respamiddle) {
-    listmiddle = list->listmiddle;
-    ilist_middle = listmiddle->ilist;
-    numneigh_middle = listmiddle->numneigh;
-    firstneigh_middle = listmiddle->firstneigh;
-    ipage_middle = listmiddle->ipage;
-    numneigh_middle_skip = list->listskip->listmiddle->numneigh;
-    firstneigh_middle_skip = list->listskip->listmiddle->firstneigh;
+    ilist_middle = list->ilist_middle;
+    numneigh_middle = list->numneigh_middle;
+    firstneigh_middle = list->firstneigh_middle;
+    ipage_middle = list->ipage_middle;
+    numneigh_middle_skip = list->listskip->numneigh_middle;
+    firstneigh_middle_skip = list->listskip->firstneigh_middle;
   }
 
   int inum = 0;
@@ -164,6 +160,6 @@ void NPairSkipRespa::build(NeighList *list)
   }
 
   list->inum = inum;
-  listinner->inum = inum;
-  if (respamiddle) listmiddle->inum = inum;
+  list->inum_inner = inum;
+  if (respamiddle) list->inum_middle = inum;
 }
diff --git a/src/npair_skip_size.cpp b/src/npair_skip_size.cpp
index e8d19dedcaf46b6461e9fd989d0596af89c6624c..075387f5b043cc857ddf2a234ebfc6a4f6c5ec15 100644
--- a/src/npair_skip_size.cpp
+++ b/src/npair_skip_size.cpp
@@ -17,9 +17,6 @@
 #include "neigh_list.h"
 #include "atom.h"
 #include "atom_vec.h"
-#include "molecule.h"
-#include "domain.h"
-#include "fix_shear_history.h"
 #include "my_page.h"
 #include "error.h"
 
@@ -32,24 +29,13 @@ NPairSkipSize::NPairSkipSize(LAMMPS *lmp) : NPair(lmp) {}
 /* ----------------------------------------------------------------------
    build skip list for subset of types from parent list
    iskip and ijskip flag which atom types and type pairs to skip
-   if list requests it, preserve shear history via fix shear/history
 ------------------------------------------------------------------------- */
 
 void NPairSkipSize::build(NeighList *list)
 {
   int i,j,ii,jj,m,n,nn,itype,jnum,joriginal,dnum,dnumbytes;
   tagint jtag;
-  int *neighptr,*jlist,*touchptr;
-  double *shearptr;
-
-  int *npartner;
-  tagint **partner;
-  double **shearpartner;
-  int **firsttouch;
-  double **firstshear;
-  MyPage<int> *ipage_touch;
-  MyPage<double> *dpage_shear;
-  NeighList *listhistory;
+  int *neighptr,*jlist;
 
   tagint *tag = atom->tag;
   int *type = atom->type;
@@ -68,28 +54,8 @@ void NPairSkipSize::build(NeighList *list)
   int *iskip = list->iskip;
   int **ijskip = list->ijskip;
 
-  FixShearHistory *fix_history = (FixShearHistory *) list->fix_history;
-  if (fix_history) {
-    fix_history->nlocal_neigh = nlocal;
-    fix_history->nall_neigh = nlocal + atom->nghost;
-    npartner = fix_history->npartner;
-    partner = fix_history->partner;
-    shearpartner = fix_history->shearpartner;
-    listhistory = list->listhistory;
-    firsttouch = listhistory->firstneigh;
-    firstshear = listhistory->firstdouble;
-    ipage_touch = listhistory->ipage;
-    dpage_shear = listhistory->dpage;
-    dnum = listhistory->dnum;
-    dnumbytes = dnum * sizeof(double);
-  }
-
   int inum = 0;
   ipage->reset();
-  if (fix_history) {
-    ipage_touch->reset();
-    dpage_shear->reset();
-  }
 
   // loop over atoms in other list
   // skip I atom entirely if iskip is set for type[I]
@@ -102,13 +68,8 @@ void NPairSkipSize::build(NeighList *list)
 
     n = 0;
     neighptr = ipage->vget();
-    if (fix_history) {
-      nn = 0;
-      touchptr = ipage_touch->vget();
-      shearptr = dpage_shear->vget();
-    }
 
-    // loop over parent non-skip size list and optionally its history info
+    // loop over parent non-skip size list
 
     jlist = firstneigh_skip[i];
     jnum = numneigh_skip[i];
@@ -117,29 +78,7 @@ void NPairSkipSize::build(NeighList *list)
       joriginal = jlist[jj];
       j = joriginal & NEIGHMASK;
       if (ijskip[itype][type[j]]) continue;
-      neighptr[n] = joriginal;
-
-      // no numeric test for current touch
-      // just use FSH partner list to infer it
-      // would require distance calculation for spheres
-      // more complex calculation for surfs
-
-      if (fix_history) {
-        jtag = tag[j];
-        for (m = 0; m < npartner[i]; m++)
-          if (partner[i][m] == jtag) break;
-        if (m < npartner[i]) {
-          touchptr[n] = 1;
-          memcpy(&shearptr[nn],&shearpartner[i][dnum*m],dnumbytes);
-          nn += dnum;
-        } else {
-          touchptr[n] = 0;
-          memcpy(&shearptr[nn],zeroes,dnumbytes);
-          nn += dnum;
-        }
-      }
-
-      n++;
+      neighptr[n++] = joriginal;
     }
 
     ilist[inum++] = i;
@@ -148,13 +87,6 @@ void NPairSkipSize::build(NeighList *list)
     ipage->vgot(n);
     if (ipage->status())
       error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-
-    if (fix_history) {
-      firsttouch[i] = touchptr;
-      firstshear[i] = shearptr;
-      ipage_touch->vgot(n);
-      dpage_shear->vgot(nn);
-    }
   }
 
   list->inum = inum;
diff --git a/src/npair_skip_size_off2on.cpp b/src/npair_skip_size_off2on.cpp
index da9dd57047444fd09e85ecfcb70335438b07aa60..92eae285d0a37d03b55b8bb09ef95d2e2f2e53d0 100644
--- a/src/npair_skip_size_off2on.cpp
+++ b/src/npair_skip_size_off2on.cpp
@@ -17,9 +17,6 @@
 #include "neigh_list.h"
 #include "atom.h"
 #include "atom_vec.h"
-#include "molecule.h"
-#include "domain.h"
-#include "fix_shear_history.h"
 #include "my_page.h"
 #include "error.h"
 
@@ -33,24 +30,13 @@ NPairSkipSizeOff2on::NPairSkipSizeOff2on(LAMMPS *lmp) : NPair(lmp) {}
    build skip list for subset of types from parent list
    iskip and ijskip flag which atom types and type pairs to skip
    parent non-skip list used newton off, this skip list is newton on
-   if list requests it, preserve shear history via fix shear/history
 ------------------------------------------------------------------------- */
 
 void NPairSkipSizeOff2on::build(NeighList *list)
 {
   int i,j,ii,jj,m,n,nn,itype,jnum,joriginal,dnum,dnumbytes;
   tagint itag,jtag;
-  int *neighptr,*jlist,*touchptr;
-  double *shearptr;
-
-  int *npartner;
-  tagint **partner;
-  double **shearpartner;
-  int **firsttouch;
-  double **firstshear;
-  MyPage<int> *ipage_touch;
-  MyPage<double> *dpage_shear;
-  NeighList *listhistory;
+  int *neighptr,*jlist;
 
   tagint *tag = atom->tag;
   int *type = atom->type;
@@ -69,28 +55,8 @@ void NPairSkipSizeOff2on::build(NeighList *list)
   int *iskip = list->iskip;
   int **ijskip = list->ijskip;
 
-  FixShearHistory *fix_history = (FixShearHistory *) list->fix_history;
-  if (fix_history) {
-    fix_history->nlocal_neigh = nlocal;
-    fix_history->nall_neigh = nlocal + atom->nghost;
-    npartner = fix_history->npartner;
-    partner = fix_history->partner;
-    shearpartner = fix_history->shearpartner;
-    listhistory = list->listhistory;
-    firsttouch = listhistory->firstneigh;
-    firstshear = listhistory->firstdouble;
-    ipage_touch = listhistory->ipage;
-    dpage_shear = listhistory->dpage;
-    dnum = listhistory->dnum;
-    dnumbytes = dnum * sizeof(double);
-  }
-
   int inum = 0;
   ipage->reset();
-  if (fix_history) {
-    ipage_touch->reset();
-    dpage_shear->reset();
-  }
 
   // loop over atoms in other list
   // skip I atom entirely if iskip is set for type[I]
@@ -104,11 +70,6 @@ void NPairSkipSizeOff2on::build(NeighList *list)
 
     n = 0;
     neighptr = ipage->vget();
-    if (fix_history) {
-      nn = 0;
-      touchptr = ipage_touch->vget();
-      shearptr = dpage_shear->vget();
-    }
 
     // loop over parent non-skip size list and optionally its history info
 
@@ -125,28 +86,7 @@ void NPairSkipSizeOff2on::build(NeighList *list)
       jtag = tag[j];
       if (j >= nlocal && jtag < itag) continue;
 
-      neighptr[n] = joriginal;
-
-      // no numeric test for current touch
-      // just use FSH partner list to infer it
-      // would require distance calculation for spheres
-      // more complex calculation for surfs
-
-      if (fix_history) {
-        for (m = 0; m < npartner[i]; m++)
-          if (partner[i][m] == jtag) break;
-        if (m < npartner[i]) {
-          touchptr[n] = 1;
-          memcpy(&shearptr[nn],&shearpartner[i][dnum*m],dnumbytes);
-          nn += dnum;
-        } else {
-          touchptr[n] = 0;
-          memcpy(&shearptr[nn],zeroes,dnumbytes);
-          nn += dnum;
-        }
-      }
-
-      n++;
+      neighptr[n++] = joriginal;
     }
 
     ilist[inum++] = i;
@@ -155,13 +95,6 @@ void NPairSkipSizeOff2on::build(NeighList *list)
     ipage->vgot(n);
     if (ipage->status())
       error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-
-    if (fix_history) {
-      firsttouch[i] = touchptr;
-      firstshear[i] = shearptr;
-      ipage_touch->vgot(n);
-      dpage_shear->vgot(nn);
-    }
   }
 
   list->inum = inum;
diff --git a/src/npair_skip_size_off2on_oneside.cpp b/src/npair_skip_size_off2on_oneside.cpp
index 7377feec5bbefe495242deadff125c48b3fa404e..f2fca7b1284389c925650d6678cc0d7d14d87fc4 100644
--- a/src/npair_skip_size_off2on_oneside.cpp
+++ b/src/npair_skip_size_off2on_oneside.cpp
@@ -17,9 +17,7 @@
 #include "neigh_list.h"
 #include "atom.h"
 #include "atom_vec.h"
-#include "molecule.h"
 #include "domain.h"
-#include "fix_shear_history.h"
 #include "my_page.h"
 #include "error.h"
 
@@ -35,7 +33,6 @@ NPairSkipSizeOff2onOneside::NPairSkipSizeOff2onOneside(LAMMPS *lmp) :
    iskip and ijskip flag which atom types and type pairs to skip
    parent non-skip list used newton off and was not onesided,
      this skip list is newton on and onesided
-   if list requests it, preserve shear history via fix shear/history
 ------------------------------------------------------------------------- */
 
 void NPairSkipSizeOff2onOneside::build(NeighList *list)
@@ -44,15 +41,6 @@ void NPairSkipSizeOff2onOneside::build(NeighList *list)
   tagint jtag;
   int *surf,*jlist;
 
-  int *npartner;
-  tagint **partner;
-  double **shearpartner;
-  int **firsttouch;
-  double **firstshear;
-  MyPage<int> *ipage_touch;
-  MyPage<double> *dpage_shear;
-  NeighList *listhistory;
-
   tagint *tag = atom->tag;
   int *type = atom->type;
   int nlocal = atom->nlocal;
@@ -73,28 +61,8 @@ void NPairSkipSizeOff2onOneside::build(NeighList *list)
   if (domain->dimension == 2) surf = atom->line;
   else surf = atom->tri;
 
-  FixShearHistory *fix_history = (FixShearHistory *) list->fix_history;
-  if (fix_history) {
-    fix_history->nlocal_neigh = nlocal;
-    fix_history->nall_neigh = nlocal + atom->nghost;
-    npartner = fix_history->npartner;
-    partner = fix_history->partner;
-    shearpartner = fix_history->shearpartner;
-    listhistory = list->listhistory;
-    firsttouch = listhistory->firstneigh;
-    firstshear = listhistory->firstdouble;
-    ipage_touch = listhistory->ipage;
-    dpage_shear = listhistory->dpage;
-    dnum = listhistory->dnum;
-    dnumbytes = dnum * sizeof(double);
-  }
-
   int inum = 0;
   ipage->reset();
-  if (fix_history) {
-    ipage_touch->reset();
-    dpage_shear->reset();
-  }
 
   // two loops over parent list required, one to count, one to store
   // because onesided constraint means pair I,J may be stored with I or J
@@ -139,7 +107,7 @@ void NPairSkipSizeOff2onOneside::build(NeighList *list)
     }
   }
 
-  // allocate all per-atom neigh list chunks, including history
+  // allocate all per-atom neigh list chunks
 
   for (i = 0; i < nlocal; i++) {
     if (numneigh[i] == 0) continue;
@@ -147,10 +115,6 @@ void NPairSkipSizeOff2onOneside::build(NeighList *list)
     firstneigh[i] = ipage->get(n);
     if (ipage->status())
       error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-    if (fix_history) {
-      firsttouch[i] = ipage_touch->get(n);
-      firstshear[i] = dpage_shear->get(dnum*n);
-    }
   }
 
   // second loop over atoms in other list to store neighbors
@@ -189,32 +153,11 @@ void NPairSkipSizeOff2onOneside::build(NeighList *list)
       // OK, b/c there is no special list flagging for surfs
 
       firstneigh[i][numneigh[i]] = j;
-
-      // no numeric test for current touch
-      // just use FSH partner list to infer it
-      // would require complex calculation for surfs
-
-      if (fix_history) {
-        jtag = tag[j];
-        n = numneigh[i];
-        nn = dnum*n;
-        for (m = 0; m < npartner[i]; m++)
-          if (partner[i][m] == jtag) break;
-        if (m < npartner[i]) {
-          firsttouch[i][n] = 1;
-          memcpy(&firstshear[i][nn],&shearpartner[i][dnum*m],dnumbytes);
-        } else {
-          firsttouch[i][n] = 0;
-          memcpy(&firstshear[i][nn],zeroes,dnumbytes);
-        }
-      }
-
       numneigh[i]++;
       if (flip) i = j;
     }
 
     // only add atom I to ilist if it has neighbors
-    // fix shear/history allows for this in pre_exchange_onesided()
 
     if (numneigh[i]) ilist[inum++] = i;
   }
diff --git a/src/output.cpp b/src/output.cpp
index ce7fcb7cca3a353bb115c2f69d036014c9c70003..11c6fa073e8c05c38056efbdc1765e5f6f8da999 100644
--- a/src/output.cpp
+++ b/src/output.cpp
@@ -827,9 +827,9 @@ void Output::create_restart(int narg, char **arg)
    sum and print memory usage
    result is only memory on proc 0, not averaged across procs
 ------------------------------------------------------------------------- */
+
 void Output::memory_usage()
 {
-
   bigint bytes = 0;
   bytes += atom->memory_usage();
   bytes += neighbor->memory_usage();
@@ -844,9 +844,9 @@ void Output::memory_usage()
   MPI_Reduce(&mbytes,&mbavg,1,MPI_DOUBLE,MPI_SUM,0,world);
   MPI_Reduce(&mbytes,&mbmin,1,MPI_DOUBLE,MPI_MIN,0,world);
   MPI_Reduce(&mbytes,&mbmax,1,MPI_DOUBLE,MPI_MAX,0,world);
-  mbavg /= comm->nprocs;
 
   if (comm->me == 0) {
+    mbavg /= comm->nprocs;
     if (screen)
       fprintf(screen,"Per MPI rank memory allocation (min/avg/max) = "
               "%.4g | %.4g | %.4g Mbytes\n",mbmin,mbavg,mbmax);
diff --git a/src/pair.h b/src/pair.h
index 0f7b0f85b62dbf6684304c78fe2e257c2bf659f4..eb71e8822474aed87e024988e27b669196385cd9 100644
--- a/src/pair.h
+++ b/src/pair.h
@@ -92,10 +92,6 @@ class Pair : protected Pointers {
   class NeighList *list;         // standard neighbor list used by most pairs
   class NeighList *listhalf;     // half list used by some pairs
   class NeighList *listfull;     // full list used by some pairs
-  class NeighList *listhistory;  // neighbor history list used by some pairs
-  class NeighList *listinner;    // rRESPA lists used by some pairs
-  class NeighList *listmiddle;
-  class NeighList *listouter;
 
   int allocated;                 // 0/1 = whether arrays are allocated
                                  //       public so external driver can check
diff --git a/src/pair_lj96_cut.cpp b/src/pair_lj96_cut.cpp
index 83fc5bcdda4218cf68b8ef3ded7039bd8d9e2bec..842b918fe154ad8db4865e2b14cd44d8141c7ff3 100644
--- a/src/pair_lj96_cut.cpp
+++ b/src/pair_lj96_cut.cpp
@@ -157,10 +157,10 @@ void PairLJ96Cut::compute_inner()
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
 
-  inum = listinner->inum;
-  ilist = listinner->ilist;
-  numneigh = listinner->numneigh;
-  firstneigh = listinner->firstneigh;
+  inum = list->inum_inner;
+  ilist = list->ilist_inner;
+  numneigh = list->numneigh_inner;
+  firstneigh = list->firstneigh_inner;
 
   double cut_out_on = cut_respa[0];
   double cut_out_off = cut_respa[1];
@@ -231,10 +231,10 @@ void PairLJ96Cut::compute_middle()
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
 
-  inum = listmiddle->inum;
-  ilist = listmiddle->ilist;
-  numneigh = listmiddle->numneigh;
-  firstneigh = listmiddle->firstneigh;
+  inum = list->inum_middle;
+  ilist = list->ilist_middle;
+  numneigh = list->numneigh_middle;
+  firstneigh = list->firstneigh_middle;
 
   double cut_in_off = cut_respa[0];
   double cut_in_on = cut_respa[1];
@@ -318,10 +318,10 @@ void PairLJ96Cut::compute_outer(int eflag, int vflag)
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
 
-  inum = listouter->inum;
-  ilist = listouter->ilist;
-  numneigh = listouter->numneigh;
-  firstneigh = listouter->firstneigh;
+  inum = list->inum;
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
 
   double cut_in_off = cut_respa[2];
   double cut_in_on = cut_respa[3];
@@ -487,36 +487,23 @@ void PairLJ96Cut::coeff(int narg, char **arg)
 
 void PairLJ96Cut::init_style()
 {
-  // request regular or rRESPA neighbor lists
+  // request regular or rRESPA neighbor list
 
   int irequest;
+  int respa = 0;
 
   if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) {
-    int respa = 0;
     if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
     if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
+  }
 
-    if (respa == 0) irequest = neighbor->request(this,instance_me);
-    else if (respa == 1) {
-      irequest = neighbor->request(this,instance_me);
-      neighbor->requests[irequest]->id = 1;
-      neighbor->requests[irequest]->respainner = 1;
-      irequest = neighbor->request(this,instance_me);
-      neighbor->requests[irequest]->id = 3;
-      neighbor->requests[irequest]->respaouter = 1;
-    } else {
-      irequest = neighbor->request(this,instance_me);
-      neighbor->requests[irequest]->id = 1;
-      neighbor->requests[irequest]->respainner = 1;
-      irequest = neighbor->request(this,instance_me);
-      neighbor->requests[irequest]->id = 2;
-      neighbor->requests[irequest]->respamiddle = 1;
-      irequest = neighbor->request(this,instance_me);
-      neighbor->requests[irequest]->id = 3;
-      neighbor->requests[irequest]->respaouter = 1;
-    }
+  irequest = neighbor->request(this,instance_me);
 
-  } else irequest = neighbor->request(this,instance_me);
+  if (respa >= 1) {
+    neighbor->requests[irequest]->respaouter = 1;
+    neighbor->requests[irequest]->respainner = 1;
+  }
+  if (respa == 2) neighbor->requests[irequest]->respamiddle = 1;
 
   // set rRESPA cutoffs
 
@@ -526,19 +513,6 @@ void PairLJ96Cut::init_style()
   else cut_respa = NULL;
 }
 
-/* ----------------------------------------------------------------------
-   neighbor callback to inform pair style of neighbor list to use
-   regular or rRESPA
-------------------------------------------------------------------------- */
-
-void PairLJ96Cut::init_list(int id, NeighList *ptr)
-{
-  if (id == 0) list = ptr;
-  else if (id == 1) listinner = ptr;
-  else if (id == 2) listmiddle = ptr;
-  else if (id == 3) listouter = ptr;
-}
-
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
diff --git a/src/pair_lj96_cut.h b/src/pair_lj96_cut.h
index 6b677c6429cddef5edc69e4f79507d3c7e8009c0..4d6df02127eddfb2611f39adbc8c5458094bc6f7 100644
--- a/src/pair_lj96_cut.h
+++ b/src/pair_lj96_cut.h
@@ -33,7 +33,6 @@ class PairLJ96Cut : public Pair {
   void settings(int, char **);
   void coeff(int, char **);
   void init_style();
-  void init_list(int, class NeighList *);
   double init_one(int, int);
   void write_restart(FILE *);
   void read_restart(FILE *);
diff --git a/src/pair_lj_cut.cpp b/src/pair_lj_cut.cpp
index 7f838061f124f8d8cb5427fc75a62a2b3193241a..215fabecbbca976d2a01c46c163bf2f0aadff734 100644
--- a/src/pair_lj_cut.cpp
+++ b/src/pair_lj_cut.cpp
@@ -156,10 +156,10 @@ void PairLJCut::compute_inner()
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
 
-  inum = listinner->inum;
-  ilist = listinner->ilist;
-  numneigh = listinner->numneigh;
-  firstneigh = listinner->firstneigh;
+  inum = list->inum_inner;
+  ilist = list->ilist_inner;
+  numneigh = list->numneigh_inner;
+  firstneigh = list->firstneigh_inner;
 
   double cut_out_on = cut_respa[0];
   double cut_out_off = cut_respa[1];
@@ -229,10 +229,10 @@ void PairLJCut::compute_middle()
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
 
-  inum = listmiddle->inum;
-  ilist = listmiddle->ilist;
-  numneigh = listmiddle->numneigh;
-  firstneigh = listmiddle->firstneigh;
+  inum = list->inum_middle;
+  ilist = list->ilist_middle;
+  numneigh = list->numneigh_middle;
+  firstneigh = list->firstneigh_middle;
 
   double cut_in_off = cut_respa[0];
   double cut_in_on = cut_respa[1];
@@ -315,10 +315,10 @@ void PairLJCut::compute_outer(int eflag, int vflag)
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
 
-  inum = listouter->inum;
-  ilist = listouter->ilist;
-  numneigh = listouter->numneigh;
-  firstneigh = listouter->firstneigh;
+  inum = list->inum;
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
 
   double cut_in_off = cut_respa[2];
   double cut_in_on = cut_respa[3];
@@ -481,36 +481,23 @@ void PairLJCut::coeff(int narg, char **arg)
 
 void PairLJCut::init_style()
 {
-  // request regular or rRESPA neighbor lists
+  // request regular or rRESPA neighbor list
 
   int irequest;
+  int respa = 0;
 
   if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) {
-    int respa = 0;
     if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
     if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
+  }
 
-    if (respa == 0) irequest = neighbor->request(this,instance_me);
-    else if (respa == 1) {
-      irequest = neighbor->request(this,instance_me);
-      neighbor->requests[irequest]->id = 1;
-      neighbor->requests[irequest]->respainner = 1;
-      irequest = neighbor->request(this,instance_me);
-      neighbor->requests[irequest]->id = 3;
-      neighbor->requests[irequest]->respaouter = 1;
-    } else {
-      irequest = neighbor->request(this,instance_me);
-      neighbor->requests[irequest]->id = 1;
-      neighbor->requests[irequest]->respainner = 1;
-      irequest = neighbor->request(this,instance_me);
-      neighbor->requests[irequest]->id = 2;
-      neighbor->requests[irequest]->respamiddle = 1;
-      irequest = neighbor->request(this,instance_me);
-      neighbor->requests[irequest]->id = 3;
-      neighbor->requests[irequest]->respaouter = 1;
-    }
+  irequest = neighbor->request(this,instance_me);
 
-  } else irequest = neighbor->request(this,instance_me);
+  if (respa >= 1) {
+    neighbor->requests[irequest]->respaouter = 1;
+    neighbor->requests[irequest]->respainner = 1;
+  }
+  if (respa == 2) neighbor->requests[irequest]->respamiddle = 1;
 
   // set rRESPA cutoffs
 
@@ -520,19 +507,6 @@ void PairLJCut::init_style()
   else cut_respa = NULL;
 }
 
-/* ----------------------------------------------------------------------
-   neighbor callback to inform pair style of neighbor list to use
-   regular or rRESPA
-------------------------------------------------------------------------- */
-
-void PairLJCut::init_list(int id, NeighList *ptr)
-{
-  if (id == 0) list = ptr;
-  else if (id == 1) listinner = ptr;
-  else if (id == 2) listmiddle = ptr;
-  else if (id == 3) listouter = ptr;
-}
-
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
diff --git a/src/pair_lj_cut.h b/src/pair_lj_cut.h
index 43eeda09cabee69f07c1a3f976c4b30f9aaeeb8f..3724685db6175366ef8de2bb13319efb91f8823a 100644
--- a/src/pair_lj_cut.h
+++ b/src/pair_lj_cut.h
@@ -32,7 +32,6 @@ class PairLJCut : public Pair {
   void settings(int, char **);
   void coeff(int, char **);
   void init_style();
-  void init_list(int, class NeighList *);
   double init_one(int, int);
   void write_restart(FILE *);
   void read_restart(FILE *);
diff --git a/src/pair_mie_cut.cpp b/src/pair_mie_cut.cpp
index 320f21248dbc6dbc1d30ea8294fc61fb7b2c791f..04f8de8d7d335b32aa4c00fe66cc764ea1e3abde 100644
--- a/src/pair_mie_cut.cpp
+++ b/src/pair_mie_cut.cpp
@@ -159,10 +159,10 @@ void PairMIECut::compute_inner()
   double *special_mie = force->special_lj;
   int newton_pair = force->newton_pair;
 
-  inum = listinner->inum;
-  ilist = listinner->ilist;
-  numneigh = listinner->numneigh;
-  firstneigh = listinner->firstneigh;
+  inum = list->inum_inner;
+  ilist = list->ilist_inner;
+  numneigh = list->numneigh_inner;
+  firstneigh = list->firstneigh_inner;
 
   double cut_out_on = cut_respa[0];
   double cut_out_off = cut_respa[1];
@@ -233,10 +233,10 @@ void PairMIECut::compute_middle()
   double *special_mie = force->special_lj;
   int newton_pair = force->newton_pair;
 
-  inum = listmiddle->inum;
-  ilist = listmiddle->ilist;
-  numneigh = listmiddle->numneigh;
-  firstneigh = listmiddle->firstneigh;
+  inum = list->inum_middle;
+  ilist = list->ilist_middle;
+  numneigh = list->numneigh_middle;
+  firstneigh = list->firstneigh_middle;
 
   double cut_in_off = cut_respa[0];
   double cut_in_on = cut_respa[1];
@@ -320,10 +320,10 @@ void PairMIECut::compute_outer(int eflag, int vflag)
   double *special_mie = force->special_lj;
   int newton_pair = force->newton_pair;
 
-  inum = listouter->inum;
-  ilist = listouter->ilist;
-  numneigh = listouter->numneigh;
-  firstneigh = listouter->firstneigh;
+  inum = list->inum;
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
 
   double cut_in_off = cut_respa[2];
   double cut_in_on = cut_respa[3];
@@ -496,36 +496,23 @@ void PairMIECut::coeff(int narg, char **arg)
 
 void PairMIECut::init_style()
 {
-  // request regular or rRESPA neighbor lists
+  // request regular or rRESPA neighbor list
 
   int irequest;
+  int respa = 0;
 
   if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) {
-    int respa = 0;
     if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
     if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
+  }
 
-    if (respa == 0) irequest = neighbor->request(this,instance_me);
-    else if (respa == 1) {
-      irequest = neighbor->request(this,instance_me);
-      neighbor->requests[irequest]->id = 1;
-      neighbor->requests[irequest]->respainner = 1;
-      irequest = neighbor->request(this,instance_me);
-      neighbor->requests[irequest]->id = 3;
-      neighbor->requests[irequest]->respaouter = 1;
-    } else {
-      irequest = neighbor->request(this,instance_me);
-      neighbor->requests[irequest]->id = 1;
-      neighbor->requests[irequest]->respainner = 1;
-      irequest = neighbor->request(this,instance_me);
-      neighbor->requests[irequest]->id = 2;
-      neighbor->requests[irequest]->respamiddle = 1;
-      irequest = neighbor->request(this,instance_me);
-      neighbor->requests[irequest]->id = 3;
-      neighbor->requests[irequest]->respaouter = 1;
-    }
+  irequest = neighbor->request(this,instance_me);
 
-  } else irequest = neighbor->request(this,instance_me);
+  if (respa >= 1) {
+    neighbor->requests[irequest]->respaouter = 1;
+    neighbor->requests[irequest]->respainner = 1;
+  }
+  if (respa == 2) neighbor->requests[irequest]->respamiddle = 1;
 
   // set rRESPA cutoffs
 
@@ -535,19 +522,6 @@ void PairMIECut::init_style()
   else cut_respa = NULL;
 }
 
-/* ----------------------------------------------------------------------
-   neighbor callback to inform pair style of neighbor list to use
-   regular or rRESPA
-------------------------------------------------------------------------- */
-
-void PairMIECut::init_list(int id, NeighList *ptr)
-{
-  if (id == 0) list = ptr;
-  else if (id == 1) listinner = ptr;
-  else if (id == 2) listmiddle = ptr;
-  else if (id == 3) listouter = ptr;
-}
-
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
diff --git a/src/pair_mie_cut.h b/src/pair_mie_cut.h
index 2a0a29843e9fc2a201068a4d5c521be7e548cc1e..9e12438d1455c12ba0393ee13379e90c71a56415 100644
--- a/src/pair_mie_cut.h
+++ b/src/pair_mie_cut.h
@@ -32,7 +32,6 @@ class PairMIECut : public Pair {
   void settings(int, char **);
   void coeff(int, char **);
   void init_style();
-  void init_list(int, class NeighList *);
   double init_one(int, int);
   void write_restart(FILE *);
   void read_restart(FILE *);
diff --git a/src/replicate.cpp b/src/replicate.cpp
index e2ed718f652bfbb894ddc484b12c52ad295fef87..f3d196416917cab8be9080243ea9d919abdebe74 100644
--- a/src/replicate.cpp
+++ b/src/replicate.cpp
@@ -74,6 +74,11 @@ void Replicate::command(int narg, char **arg)
   if (atom->nextra_grow || atom->nextra_restart || atom->nextra_store)
     error->all(FLERR,"Cannot replicate with fixes that store atom quantities");
 
+  // Record wall time for atom replication
+
+  MPI_Barrier(world);
+  double time1 = MPI_Wtime();
+
   // maxtag = largest atom tag across all existing atoms
 
   tagint maxtag = 0;
@@ -424,4 +429,16 @@ void Replicate::command(int narg, char **arg)
     Special special(lmp);
     special.build();
   }
+
+  // Wall time
+
+  MPI_Barrier(world);
+  double time2 = MPI_Wtime();
+
+  if (me == 0) {
+    if (screen)
+      fprintf(screen,"  Time spent = %g secs\n",time2-time1);
+    if (logfile)
+      fprintf(logfile,"  Time spent = %g secs\n",time2-time1);
+  }
 }
diff --git a/src/respa.cpp b/src/respa.cpp
index 5d51ff64eef696215eea0498e94610b6eebf7eb5..23cd941834482d3f5047602c8fa98e426f901ba4 100644
--- a/src/respa.cpp
+++ b/src/respa.cpp
@@ -442,6 +442,7 @@ void Respa::setup(int flag)
   domain->box_too_small_check();
   modify->setup_pre_neighbor();
   neighbor->build();
+  modify->setup_post_neighbor();
   neighbor->ncalls = 0;
 
   // compute all forces
@@ -517,6 +518,7 @@ void Respa::setup_minimal(int flag)
     domain->box_too_small_check();
     modify->setup_pre_neighbor();
     neighbor->build();
+    modify->setup_post_neighbor();
     neighbor->ncalls = 0;
   }
 
@@ -668,6 +670,11 @@ void Respa::recurse(int ilevel)
         }
         neighbor->build();
         timer->stamp(Timer::NEIGH);
+        if (modify->n_post_neighbor) {
+          modify->post_neighbor();
+          timer->stamp(Timer::MODIFY);
+        }
+
       } else if (ilevel == 0) {
         timer->stamp();
         comm->forward_comm();
diff --git a/src/verlet.cpp b/src/verlet.cpp
index b242b00722477ff4e9a7a15c921ec50e6793c49b..d74906556b35c32bde5da26114a31cf7c9441b13 100644
--- a/src/verlet.cpp
+++ b/src/verlet.cpp
@@ -90,10 +90,9 @@ void Verlet::setup(int flag)
   if (comm->me == 0 && screen) {
     fprintf(screen,"Setting up Verlet run ...\n");
     if (flag) {
-      fprintf(screen,"  Unit style    : %s\n", update->unit_style);
-      fprintf(screen,"  Current step  : " BIGINT_FORMAT "\n",
-              update->ntimestep);
-      fprintf(screen,"  Time step     : %g\n", update->dt);
+      fprintf(screen,"  Unit style    : %s\n",update->unit_style);
+      fprintf(screen,"  Current step  : " BIGINT_FORMAT "\n",update->ntimestep);
+      fprintf(screen,"  Time step     : %g\n",update->dt);
       timer->print_timeout(screen);
     }
   }
@@ -122,6 +121,7 @@ void Verlet::setup(int flag)
   domain->box_too_small_check();
   modify->setup_pre_neighbor();
   neighbor->build();
+  modify->setup_post_neighbor();
   neighbor->ncalls = 0;
 
   // compute all forces
@@ -183,6 +183,7 @@ void Verlet::setup_minimal(int flag)
     domain->box_too_small_check();
     modify->setup_pre_neighbor();
     neighbor->build();
+    modify->setup_post_neighbor();
     neighbor->ncalls = 0;
   }
 
@@ -227,6 +228,7 @@ void Verlet::run(int n)
   int n_post_integrate = modify->n_post_integrate;
   int n_pre_exchange = modify->n_pre_exchange;
   int n_pre_neighbor = modify->n_pre_neighbor;
+  int n_post_neighbor = modify->n_post_neighbor;
   int n_pre_force = modify->n_pre_force;
   int n_pre_reverse = modify->n_pre_reverse;
   int n_post_force = modify->n_post_force;
@@ -284,6 +286,10 @@ void Verlet::run(int n)
       }
       neighbor->build();
       timer->stamp(Timer::NEIGH);
+      if (n_post_neighbor) {
+        modify->post_neighbor();
+        timer->stamp(Timer::MODIFY);
+      }
     }
 
     // force computations
diff --git a/src/version.h b/src/version.h
index 0c4c4fda625cf8737092e1d15ed289d6e5052016..f9abe92ee4508bbce82c2a97e54bb566d673dad5 100644
--- a/src/version.h
+++ b/src/version.h
@@ -1 +1 @@
-#define LAMMPS_VERSION "22 Sep 2017"
+#define LAMMPS_VERSION "23 Oct 2017"
diff --git a/tools/phonon/Makefile b/tools/phonon/Makefile
index 0aacb1e0864e674e98b575599a8aebac9926dd87..67f9b91fdf78298fe1f65ddef7767505ee161993 100644
--- a/tools/phonon/Makefile
+++ b/tools/phonon/Makefile
@@ -1,7 +1,7 @@
 .SUFFIXES : .o .cpp
 # compiler and flags
-CC     = g++ -Wno-unused-result
-LINK   = $(CC) -static
+CC     = g++ -Wall
+LINK   = $(CC)
 CFLAGS = -O3 $(DEBUG) $(UFLAG)
 #
 OFLAGS = -O3 $(DEBUG)
@@ -9,18 +9,17 @@ INC    = $(LPKINC) $(TCINC) $(SPGINC)
 LIB    = $(LPKLIB) $(TCLIB) $(SPGLIB)
 #
 # cLapack library needed
-LPKINC = -I/opt/libs/clapack/3.2.1/include
-LPKLIB = -L/opt/libs/clapack/3.2.1/lib -lclapack -lblas -lf2c #-lm
+LPKINC = 
+LPKLIB =-llapack
 #
-# Tricubic library needed
-TCINC = -I/opt/libs/tricubic/1.0/include
-TCLIB = -L/opt/libs/tricubic/1.0/lib -ltricubic
 #
 # spglib 1.8.2, used to get the irreducible q-points
 # if UFLAG is not set, spglib won't be used.
-UFLAG  = -DUseSPG
-SPGINC = -I/opt/libs/spglib/1.8.2/include
-SPGLIB = -L/opt/libs/spglib/1.8.2/lib -lsymspg
+
+# UFLAG  = -DUseSPG
+# SPGINC = -I/opt/libs/spglib/1.8.2/include
+# SPGLIB = -L/opt/libs/spglib/1.8.2/lib -lsymspg
+
 # if spglib other than version 1.8.2 is used, please 
 # modify file phonon.cpp, instruction can be found by searching 1.8.2
 
@@ -36,7 +35,7 @@ SRC = $(wildcard *.cpp)
 OBJ = $(SRC:.cpp=.o)
 
 #====================================================================
-all:  ver ${EXE}
+all:  ${EXE}
 
 ${EXE}: $(OBJ)
 	$(LINK) $(OFLAGS) $(OBJ) $(LIB) -o $@
@@ -59,3 +58,16 @@ ver:
 	$(CC) $(CFLAGS) -c $<
 .cpp.o:
 	$(CC) $(CFLAGS) $(INC) -c $<
+
+#====================================================================
+# dependencies
+disp.o: disp.cpp phonon.h dynmat.h memory.h interpolate.h green.h timer.h \
+ global.h
+dynmat.o: dynmat.cpp dynmat.h memory.h interpolate.h version.h global.h
+green.o: green.cpp green.h memory.h global.h
+interpolate.o: interpolate.cpp interpolate.h memory.h global.h
+main.o: main.cpp dynmat.h memory.h interpolate.h phonon.h
+memory.o: memory.cpp memory.h
+phonon.o: phonon.cpp phonon.h dynmat.h memory.h interpolate.h green.h \
+ timer.h global.h
+timer.o: timer.cpp timer.h
diff --git a/tools/phonon/README b/tools/phonon/README
index ae6383b6bd2178181a1761726c0200cbd77a6d57..b54d96d8a33300b185410705142ed27ad9db824c 100644
--- a/tools/phonon/README
+++ b/tools/phonon/README
@@ -5,15 +5,9 @@
    analyse the phonon related information.
 #-------------------------------------------------------------------------------
 1. Dependencies
-   The clapack library is needed to solve the eigen problems,
-   which could be downloaded from:
-   http://www.netlib.org/clapack/
-   
-   The tricubic library is also needed to do tricubic interpolations,
-   which could be obtained from:
-      http://orca.princeton.edu/francois/software/tricubic/
-   or
-      http://1drv.ms/1J2WFYk
+   The LAPACK library is needed to solve the eigen problems.
+   http://www.netlib.org/lapack/
+   Intel MKL can be used as well.
    
    The spglib is optionally needed, enabling one to evaluate the
    phonon density of states or vibrational thermal properties
diff --git a/tools/phonon/disp.cpp b/tools/phonon/disp.cpp
index 2fa603916cf8e662b7764f66e9eac17edb44ec46..218e01e7fc07797ba3ac6e1843597e49e52948ca 100644
--- a/tools/phonon/disp.cpp
+++ b/tools/phonon/disp.cpp
@@ -18,7 +18,8 @@ void Phonon::pdisp()
 {
   // ask the output file name and write the header.
   char str[MAXLINE];
-  for (int ii = 0; ii < 80; ++ii) printf("="); printf("\n");
+  for (int ii = 0; ii < 80; ++ii) printf("=");
+  printf("\n");
 #ifdef UseSPG
   // ask method to generate q-lines
   int method = 2;
@@ -53,7 +54,6 @@ void Phonon::pdisp()
     while (1){
       for (int i = 0; i < 3; ++i) qstr[i] = qend[i];
   
-      int quit = 0;
       printf("\nPlease input the start q-point in unit of B1->B3, q to exit [%g %g %g]: ", qstr[0], qstr[1], qstr[2]);
       int n = count_words(fgets(str, MAXLINE, stdin));
       ptr = strtok(str, " \t\n\r\f");
@@ -2844,7 +2844,8 @@ void Phonon::pdisp()
     printf("\nPhonon dispersion data are written to: %s, you can visualize the results\n", fname);
     printf("by invoking: `gnuplot pdisp.gnuplot; gv pdisp.eps`\n");
   }
-  for (int ii = 0; ii < 80; ++ii) printf("="); printf("\n");
+  for (int ii = 0; ii < 80; ++ii) printf("=");
+  printf("\n");
 
   delete []fname;
   nodes.clear();
diff --git a/tools/phonon/dynmat.cpp b/tools/phonon/dynmat.cpp
index e82f473130eb3a63822436c2ca3ae2527e66146e..3b7bfe826853f86c61a65f7becc42787130e2373 100644
--- a/tools/phonon/dynmat.cpp
+++ b/tools/phonon/dynmat.cpp
@@ -3,6 +3,11 @@
 #include "version.h"
 #include "global.h"
 
+extern "C" void zheevd_(char *, char *, long int *, doublecomplex *,
+                       long int *, double *, doublecomplex *,
+                       long int *, double *, long int *, long int *,
+                       long int *, long int *);
+
 // to initialize the class
 DynMat::DynMat(int narg, char **arg)
 {
@@ -81,7 +86,8 @@ DynMat::DynMat(int narg, char **arg)
   printf("Number of atoms per unit cell     : %d\n", nucell);
   printf("System dimension                  : %d\n", sysdim);
   printf("Boltzmann constant in used units  : %g\n", boltz);
-  for (int i = 0; i < 80; ++i) printf("="); printf("\n");
+  for (int i = 0; i < 80; ++i) printf("=");
+  printf("\n");
   if (sysdim < 1||sysdim > 3||nx < 1||ny < 1||nz < 1||nucell < 1){
     printf("Wrong values read from header of file: %s, please check the binary file!\n", binfile);
     fclose(fp); exit(3);
@@ -117,11 +123,11 @@ DynMat::DynMat(int narg, char **arg)
   memory->create(attyp, nucell,         "DynMat:attyp");
   memory->create(M_inv_sqrt, nucell,    "DynMat:M_inv_sqrt");
   
-  if ( fread(&Tmeasure,      sizeof(double), 1,      fp) != 1     ){printf("\nError while reading temperature from file: %s\n",   binfile); fclose(fp); exit(3);}
-  if ( fread(&basevec[0],    sizeof(double), 9,      fp) != 9     ){printf("\nError while reading lattice info from file: %s\n",  binfile); fclose(fp); exit(3);}
-  if ( fread(basis[0],       sizeof(double), fftdim, fp) != fftdim){printf("\nError while reading basis info from file: %s\n",    binfile); fclose(fp); exit(3);}
-  if ( fread(&attyp[0],      sizeof(int),    nucell, fp) != nucell){printf("\nError while reading atom types from file: %s\n",    binfile); fclose(fp); exit(3);}
-  if ( fread(&M_inv_sqrt[0], sizeof(double), nucell, fp) != nucell){printf("\nError while reading atomic masses from file: %s\n", binfile); fclose(fp); exit(3);}
+  if ( (int) fread(&Tmeasure,      sizeof(double), 1,      fp) != 1     ){printf("\nError while reading temperature from file: %s\n",   binfile); fclose(fp); exit(3);}
+  if ( (int) fread(&basevec[0],    sizeof(double), 9,      fp) != 9     ){printf("\nError while reading lattice info from file: %s\n",  binfile); fclose(fp); exit(3);}
+  if ( (int) fread(basis[0],       sizeof(double), fftdim, fp) != fftdim){printf("\nError while reading basis info from file: %s\n",    binfile); fclose(fp); exit(3);}
+  if ( (int) fread(&attyp[0],      sizeof(int),    nucell, fp) != nucell){printf("\nError while reading atom types from file: %s\n",    binfile); fclose(fp); exit(3);}
+  if ( (int) fread(&M_inv_sqrt[0], sizeof(double), nucell, fp) != nucell){printf("\nError while reading atomic masses from file: %s\n", binfile); fclose(fp); exit(3);}
   fclose(fp);
 
   car2dir();
@@ -229,9 +235,9 @@ return;
 int DynMat::geteigen(double *egv, int flag)
 {
   char jobz, uplo;
-  integer n, lda, lwork, lrwork, *iwork, liwork, info;
+  long int n, lda, lwork, lrwork, *iwork, liwork, info;
   doublecomplex *work;
-  doublereal *w = &egv[0], *rwork;
+  double *w = &egv[0], *rwork;
 
   n     = fftdim;
   if (flag) jobz = 'V';
@@ -338,7 +344,8 @@ void DynMat::EnforceASR()
   char *ptr = strtok(str," \t\n\r\f");
   if (ptr) nasr = atoi(ptr);
   if (nasr < 1){
-    for (int i=0; i<80; i++) printf("="); printf("\n");
+    for (int i=0; i<80; i++) printf("=");
+    printf("\n");
     return;
   }
 
@@ -404,7 +411,8 @@ void DynMat::EnforceASR()
     if (i == 99){ printf("...... (%d more skiped)", fftdim-100); break;}
   }
   printf("\n");
-  for (int i = 0; i < 80; ++i) printf("="); printf("\n\n");
+  for (int i = 0; i < 80; ++i) printf("=");
+  printf("\n\n");
 
 return;
 }
@@ -456,7 +464,7 @@ return;
  * --------------------------------------------------------------------*/
 void DynMat::GaussJordan(int n, double *Mat)
 {
-  int i,icol,irow,j,k,l,ll,idr,idc;
+  int i,icol=0,irow=0,j,k,l,ll,idr,idc;
   int *indxc,*indxr,*ipiv;
   double big, nmjk;
   double dum, pivinv;
diff --git a/tools/phonon/dynmat.h b/tools/phonon/dynmat.h
index 1d6e716584e86c8a8fc34407254f8daeec6c1e49..f5bd4010b8739299b219121d4c66f685c883d4ef 100644
--- a/tools/phonon/dynmat.h
+++ b/tools/phonon/dynmat.h
@@ -7,11 +7,6 @@
 #include "memory.h"
 #include "interpolate.h"
 
-extern "C"{
-#include "f2c.h"
-#include "clapack.h"
-}
-
 using namespace std;
 
 class DynMat {
diff --git a/tools/phonon/green.cpp b/tools/phonon/green.cpp
index 8f8946dc4faaaa5ec16d0abda28e7c7abc5f2bf2..35514c03fbb640528c2939cd667c167e9930aaf0 100644
--- a/tools/phonon/green.cpp
+++ b/tools/phonon/green.cpp
@@ -224,7 +224,6 @@ void Green::recursion()
 {
   // local variables
   std::complex<double> Z, rec_x, rec_x_inv;
-  std::complex<double> cunit = std::complex<double>(0.,1.);
 
   double w = wmin;
 
diff --git a/tools/phonon/interpolate.cpp b/tools/phonon/interpolate.cpp
index 8c0cbde1cec52e605f17e0c6b1359334d25d4c91..954062d4159f4dae018a4330b9c5522cb5c674f1 100644
--- a/tools/phonon/interpolate.cpp
+++ b/tools/phonon/interpolate.cpp
@@ -1,7 +1,125 @@
 #include "interpolate.h"
-#include "math.h"
+#include <math.h>
 #include "global.h"
 
+///////////////////////
+// tricubic library code
+static int A[64][64] = {
+{ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{-3, 3, 0, 0, 0, 0, 0, 0,-2,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{ 2,-2, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{-3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2, 0,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0,-3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2, 0,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{ 9,-9,-9, 9, 0, 0, 0, 0, 6, 3,-6,-3, 0, 0, 0, 0, 6,-6, 3,-3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{-6, 6, 6,-6, 0, 0, 0, 0,-3,-3, 3, 3, 0, 0, 0, 0,-4, 4,-2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2,-2,-1,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{ 2, 0,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0, 2, 0,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{-6, 6, 6,-6, 0, 0, 0, 0,-4,-2, 4, 2, 0, 0, 0, 0,-3, 3,-3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2,-1,-2,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{ 4,-4,-4, 4, 0, 0, 0, 0, 2, 2,-2,-2, 0, 0, 0, 0, 2,-2, 2,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-3, 3, 0, 0, 0, 0, 0, 0,-2,-1, 0, 0, 0, 0, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,-2, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2, 0,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2, 0,-1, 0, 0, 0, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9,-9,-9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 3,-6,-3, 0, 0, 0, 0, 6,-6, 3,-3, 0, 0, 0, 0, 4, 2, 2, 1, 0, 0, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-6, 6, 6,-6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-3,-3, 3, 3, 0, 0, 0, 0,-4, 4,-2, 2, 0, 0, 0, 0,-2,-2,-1,-1, 0, 0, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-6, 6, 6,-6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-4,-2, 4, 2, 0, 0, 0, 0,-3, 3,-3, 3, 0, 0, 0, 0,-2,-1,-2,-1, 0, 0, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4,-4,-4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2,-2,-2, 0, 0, 0, 0, 2,-2, 2,-2, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0},
+{-3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2, 0, 0, 0,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0,-3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2, 0, 0, 0,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{ 9,-9, 0, 0,-9, 9, 0, 0, 6, 3, 0, 0,-6,-3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,-6, 0, 0, 3,-3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 2, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{-6, 6, 0, 0, 6,-6, 0, 0,-3,-3, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-4, 4, 0, 0,-2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2,-2, 0, 0,-1,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2, 0, 0, 0,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2, 0, 0, 0,-1, 0, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9,-9, 0, 0,-9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 3, 0, 0,-6,-3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,-6, 0, 0, 3,-3, 0, 0, 4, 2, 0, 0, 2, 1, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-6, 6, 0, 0, 6,-6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-3,-3, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-4, 4, 0, 0,-2, 2, 0, 0,-2,-2, 0, 0,-1,-1, 0, 0},
+{ 9, 0,-9, 0,-9, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 3, 0,-6, 0,-3, 0, 6, 0,-6, 0, 3, 0,-3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 2, 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0, 9, 0,-9, 0,-9, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 3, 0,-6, 0,-3, 0, 6, 0,-6, 0, 3, 0,-3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 2, 0, 2, 0, 1, 0},
+{-27,27,27,-27,27,-27,-27,27,-18,-9,18, 9,18, 9,-18,-9,-18,18,-9, 9,18,-18, 9,-9,-18,18,18,-18,-9, 9, 9,-9,-12,-6,-6,-3,12, 6, 6, 3,-12,-6,12, 6,-6,-3, 6, 3,-12,12,-6, 6,-6, 6,-3, 3,-8,-4,-4,-2,-4,-2,-2,-1},
+{18,-18,-18,18,-18,18,18,-18, 9, 9,-9,-9,-9,-9, 9, 9,12,-12, 6,-6,-12,12,-6, 6,12,-12,-12,12, 6,-6,-6, 6, 6, 6, 3, 3,-6,-6,-3,-3, 6, 6,-6,-6, 3, 3,-3,-3, 8,-8, 4,-4, 4,-4, 2,-2, 4, 4, 2, 2, 2, 2, 1, 1},
+{-6, 0, 6, 0, 6, 0,-6, 0, 0, 0, 0, 0, 0, 0, 0, 0,-3, 0,-3, 0, 3, 0, 3, 0,-4, 0, 4, 0,-2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2, 0,-2, 0,-1, 0,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0,-6, 0, 6, 0, 6, 0,-6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-3, 0,-3, 0, 3, 0, 3, 0,-4, 0, 4, 0,-2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2, 0,-2, 0,-1, 0,-1, 0},
+{18,-18,-18,18,-18,18,18,-18,12, 6,-12,-6,-12,-6,12, 6, 9,-9, 9,-9,-9, 9,-9, 9,12,-12,-12,12, 6,-6,-6, 6, 6, 3, 6, 3,-6,-3,-6,-3, 8, 4,-8,-4, 4, 2,-4,-2, 6,-6, 6,-6, 3,-3, 3,-3, 4, 2, 4, 2, 2, 1, 2, 1},
+{-12,12,12,-12,12,-12,-12,12,-6,-6, 6, 6, 6, 6,-6,-6,-6, 6,-6, 6, 6,-6, 6,-6,-8, 8, 8,-8,-4, 4, 4,-4,-3,-3,-3,-3, 3, 3, 3, 3,-4,-4, 4, 4,-2,-2, 2, 2,-4, 4,-4, 4,-2, 2,-2, 2,-2,-2,-2,-2,-1,-1,-1,-1},
+{ 2, 0, 0, 0,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{-6, 6, 0, 0, 6,-6, 0, 0,-4,-2, 0, 0, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-3, 3, 0, 0,-3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2,-1, 0, 0,-2,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{ 4,-4, 0, 0,-4, 4, 0, 0, 2, 2, 0, 0,-2,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,-2, 0, 0, 2,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-6, 6, 0, 0, 6,-6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-4,-2, 0, 0, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-3, 3, 0, 0,-3, 3, 0, 0,-2,-1, 0, 0,-2,-1, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4,-4, 0, 0,-4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0,-2,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,-2, 0, 0, 2,-2, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0},
+{-6, 0, 6, 0, 6, 0,-6, 0, 0, 0, 0, 0, 0, 0, 0, 0,-4, 0,-2, 0, 4, 0, 2, 0,-3, 0, 3, 0,-3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2, 0,-1, 0,-2, 0,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0,-6, 0, 6, 0, 6, 0,-6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-4, 0,-2, 0, 4, 0, 2, 0,-3, 0, 3, 0,-3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0,-2, 0,-1, 0,-2, 0,-1, 0},
+{18,-18,-18,18,-18,18,18,-18,12, 6,-12,-6,-12,-6,12, 6,12,-12, 6,-6,-12,12,-6, 6, 9,-9,-9, 9, 9,-9,-9, 9, 8, 4, 4, 2,-8,-4,-4,-2, 6, 3,-6,-3, 6, 3,-6,-3, 6,-6, 3,-3, 6,-6, 3,-3, 4, 2, 2, 1, 4, 2, 2, 1},
+{-12,12,12,-12,12,-12,-12,12,-6,-6, 6, 6, 6, 6,-6,-6,-8, 8,-4, 4, 8,-8, 4,-4,-6, 6, 6,-6,-6, 6, 6,-6,-4,-4,-2,-2, 4, 4, 2, 2,-3,-3, 3, 3,-3,-3, 3, 3,-4, 4,-2, 2,-4, 4,-2, 2,-2,-2,-1,-1,-2,-2,-1,-1},
+{ 4, 0,-4, 0,-4, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0,-2, 0,-2, 0, 2, 0,-2, 0, 2, 0,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+{ 0, 0, 0, 0, 0, 0, 0, 0, 4, 0,-4, 0,-4, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0,-2, 0,-2, 0, 2, 0,-2, 0, 2, 0,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0},
+{-12,12,12,-12,12,-12,-12,12,-8,-4, 8, 4, 8, 4,-8,-4,-6, 6,-6, 6, 6,-6, 6,-6,-6, 6, 6,-6,-6, 6, 6,-6,-4,-2,-4,-2, 4, 2, 4, 2,-4,-2, 4, 2,-4,-2, 4, 2,-3, 3,-3, 3,-3, 3,-3, 3,-2,-1,-2,-1,-2,-1,-2,-1},
+{ 8,-8,-8, 8,-8, 8, 8,-8, 4, 4,-4,-4,-4,-4, 4, 4, 4,-4, 4,-4,-4, 4,-4, 4, 4,-4,-4, 4, 4,-4,-4, 4, 2, 2, 2, 2,-2,-2,-2,-2, 2, 2,-2,-2, 2, 2,-2,-2, 2,-2, 2,-2, 2,-2, 2,-2, 1, 1, 1, 1, 1, 1, 1, 1}};
+
+static int ijk2n(int i, int j, int k) {
+  return(i+4*j+16*k);
+}
+
+/* ---------------------------------------------------------------------------- */
+
+static void tricubic_get_coeff_stacked(double a[64], double x[64]) {
+  int i,j;
+  for (i=0;i<64;i++) {
+    a[i]=(double)(0.0);
+    for (j=0;j<64;j++) {
+      a[i]+=A[i][j]*x[j];
+    }
+  }
+}
+
+static void tricubic_get_coeff(double a[64], double f[8], double dfdx[8], double dfdy[8], double dfdz[8], double d2fdxdy[8], double d2fdxdz[8], double d2fdydz[8], double d3fdxdydz[8]) {
+  int i;
+  double x[64];
+  for (i=0;i<8;i++) {
+    x[0+i]=f[i];
+    x[8+i]=dfdx[i];
+    x[16+i]=dfdy[i];
+    x[24+i]=dfdz[i];
+    x[32+i]=d2fdxdy[i];
+    x[40+i]=d2fdxdz[i];
+    x[48+i]=d2fdydz[i];
+    x[56+i]=d3fdxdydz[i];
+  }
+  tricubic_get_coeff_stacked(a,x);
+}
+
+static double tricubic_eval(double a[64], double x, double y, double z) {
+  int i,j,k;
+  double ret=(double)(0.0);
+  /* TRICUBIC EVAL
+     This is the short version of tricubic_eval. It is used to compute
+     the value of the function at a given point (x,y,z). To compute
+     partial derivatives of f, use the full version with the extra args.
+  */
+  for (i=0;i<4;i++) {
+    for (j=0;j<4;j++) {
+      for (k=0;k<4;k++) {
+        ret+=a[ijk2n(i,j,k)]*pow(x,i)*pow(y,j)*pow(z,k);
+      }
+    }
+  }
+  return(ret);
+}
+
 /* ----------------------------------------------------------------------------
  * Constructor used to get info from caller, and prepare other necessary data
  * ---------------------------------------------------------------------------- */
@@ -274,7 +392,8 @@ void Interpolate::set_method()
 
   which =2-im%2;
   printf("Your  selection: %d\n", which);
-  for(int i=0; i<80; i++) printf("="); printf("\n\n");
+  for(int i=0; i<80; i++) printf("=");
+  printf("\n\n");
 
   if (which == 1) tricubic_init();
 
@@ -306,4 +425,3 @@ void Interpolate::reset_gamma()
 
 return;
 }
-/* ---------------------------------------------------------------------------- */
diff --git a/tools/phonon/interpolate.h b/tools/phonon/interpolate.h
index e192fcac87ef6d7b8be3f741075c8db94583ca61..04a358ae711180ca35cbc9bc75f1a6def233185f 100644
--- a/tools/phonon/interpolate.h
+++ b/tools/phonon/interpolate.h
@@ -5,11 +5,8 @@
 #include "stdlib.h"
 #include "string.h"
 #include "memory.h"
-#include <tricubic.h>
-extern "C"{
-#include "f2c.h"
-#include "clapack.h"
-}
+
+extern "C" typedef struct { double r, i; } doublecomplex;
 
 using namespace std;
 
diff --git a/tools/phonon/phonon.cpp b/tools/phonon/phonon.cpp
index 43bea111b426dc23f1c0ce22b59bf14e36d1a145..065885cf3f7a08e0518871b5418369617ed7d3b0 100644
--- a/tools/phonon/phonon.cpp
+++ b/tools/phonon/phonon.cpp
@@ -42,7 +42,8 @@ Phonon::Phonon(DynMat *dm)
     printf("\n");
     for (int i = 0; i < 37; ++i) printf("=");
     printf(" Menu ");
-    for (int i = 0; i < 37; ++i) printf("="); printf("\n");
+    for (int i = 0; i < 37; ++i) printf("=");
+    printf("\n");
     printf("  1. Phonon DOS evaluation;\n");
     printf("  2. Phonon dispersion curves;\n");
     printf("  3. Dynamical matrix at arbitrary q;\n");
@@ -60,7 +61,8 @@ Phonon::Phonon(DynMat *dm)
     printf("Your choice [0]: ");
     if (count_words(fgets(str,MAXLINE,stdin)) > 0) job = atoi(strtok(str," \t\n\r\f"));
     printf("\nYour  selection: %d\n", job);
-    for (int i = 0; i < 80; ++i) printf("=");printf("\n\n");
+    for (int i = 0; i < 80; ++i) printf("=");
+    printf("\n\n");
 
     // now to do the job according to user's choice
     if      (job == 1) pdos();
@@ -414,7 +416,8 @@ void Phonon::vfanyq()
     dynmat->geteigen(egvs, 0);
     printf("q-point: [%lg %lg %lg], ", q[0], q[1], q[2]);
     printf("vibrational frequencies at this q-point:\n");
-    for (int i = 0; i < ndim; ++i) printf("%lg ", egvs[i]); printf("\n\n");
+    for (int i = 0; i < ndim; ++i) printf("%lg ", egvs[i]);
+    printf("\n\n");
   }
 
 return;
@@ -1001,7 +1004,8 @@ void Phonon::ShowCell()
   printf("\n");
   for (int i = 0; i < 30; ++i) printf("=");
   printf("   Unit Cell Info   ");
-  for (int i = 0; i < 30; ++i) printf("="); printf("\n");
+  for (int i = 0; i < 30; ++i) printf("=");
+  printf("\n");
   printf("Number of atoms in the unit cell: %d\n", dynmat->nucell);
   printf("Basis  vectors  of the unit cell:\n");
   printf("  %15.8f  %15.8f  %15.8f\n", dynmat->basevec[0],  dynmat->basevec[1],  dynmat->basevec[2]);
@@ -1091,7 +1095,7 @@ int Phonon::count_words(const char *line)
   strcpy(copy,line);
 
   char *ptr;
-  if (ptr = strchr(copy,'#')) *ptr = '\0';
+  if ((ptr = strchr(copy,'#'))) *ptr = '\0';
 
   if (strtok(copy," \t\n\r\f") == NULL) {
     memory->destroy(copy);
diff --git a/tools/phonon/version.h b/tools/phonon/version.h
index 8ed0e80aa7035405dafcc3c900296a4c6e7e8552..decab631b0a05e0ffdc0cb6092d52d66cd601e82 100644
--- a/tools/phonon/version.h
+++ b/tools/phonon/version.h
@@ -1 +1 @@
-#define VERSION 7
+#define VERSION 8