diff --git a/examples/USER/tally/log.12Jun17.force.1 b/examples/USER/tally/log.12Jun17.force.1
new file mode 100644
index 0000000000000000000000000000000000000000..98b3bad2457c44d328556eecf183f537be2b9b93
--- /dev/null
+++ b/examples/USER/tally/log.12Jun17.force.1
@@ -0,0 +1,177 @@
+LAMMPS (19 May 2017)
+
+units		real
+atom_style	full
+
+read_data	data.spce
+  orthogonal box = (0.02645 0.02645 0.02641) to (35.5328 35.5328 35.4736)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  4500 atoms
+  scanning bonds ...
+  2 = max bonds/atom
+  scanning angles ...
+  1 = max angles/atom
+  reading bonds ...
+  3000 bonds
+  reading angles ...
+  1500 angles
+  2 = max # of 1-2 neighbors
+  1 = max # of 1-3 neighbors
+  1 = max # of 1-4 neighbors
+  2 = max # of special neighbors
+
+pair_style	lj/cut/coul/long 12.0 12.0
+kspace_style	pppm 1.0e-4
+
+pair_coeff	1 1 0.15535 3.166
+pair_coeff	* 2 0.0000 0.0000
+
+bond_style	harmonic
+angle_style	harmonic
+dihedral_style	none
+improper_style	none
+
+bond_coeff	1 1000.00 1.000
+angle_coeff	1 100.0 109.47
+
+special_bonds   lj/coul 0.0 0.0 1.0
+  2 = max # of 1-2 neighbors
+  1 = max # of 1-3 neighbors
+  2 = max # of special neighbors
+
+neighbor        2.0 bin
+
+fix		1 all shake 0.0001 20 0 b 1 a 1
+  0 = # of size 2 clusters
+  0 = # of size 3 clusters
+  0 = # of size 4 clusters
+  1500 = # of frozen angles
+fix		2 all nvt temp 300.0 300.0 100.0
+
+# make certain that shake constraints are satisfied
+run 0 post no
+PPPM initialization ...
+WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
+  G vector (1/distance) = 0.218482
+  grid = 15 15 15
+  stencil order = 5
+  estimated absolute RMS force accuracy = 0.0319435
+  estimated relative force accuracy = 9.61968e-05
+  using double precision FFTs
+  3d grid and FFT values/proc = 8000 3375
+Neighbor list info ...
+  update every 1 steps, delay 10 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 14
+  ghost atom cutoff = 14
+  binsize = 7, bins = 6 6 6
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair lj/cut/coul/long, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 26.54 | 26.54 | 26.54 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0            0   -16692.358            0   -16692.358   -1289.8319 
+Loop time of 3e-06 on 1 procs for 0 steps with 4500 atoms
+
+
+group		one molecule 1 2
+6 atoms in group one
+
+# the following section shows equivalences between using the force/tally compute and other computes and thermo keywords
+
+# compute pairwise force between two molecules and everybody
+compute		fpa one group/group all pair yes kspace no boundary no
+# tally pairwise force between two molecules and the all molecules
+compute		c1 one force/tally all
+# tally the force of all with all (should be zero)
+compute		c2 all force/tally all
+# collect per atom data. only reduce over the first group.
+compute		one one reduce sum c_c1[1] c_c1[2] c_c1[3]
+compute		red all reduce sum c_c2[1] c_c2[2] c_c2[3]
+# determine magnitude of force
+variable	fpa equal sqrt(c_fpa[1]*c_fpa[1]+c_fpa[2]*c_fpa[2]+c_fpa[3]*c_fpa[3])
+variable	for equal sqrt(c_one[1]*c_one[1]+c_one[2]*c_one[2]+c_one[3]*c_one[3])
+# round to 10**-10 absolute precision.
+variable	ref equal round(1e10*sqrt(c_red[1]*c_red[1]+c_red[2]*c_red[2]+c_red[3]*c_red[3]))*1e-10
+variable	all equal round(1e10*c_c2)*1e-10
+
+velocity	all create 300 432567 dist uniform
+
+timestep	2.0
+
+# v_fpa and v_for and c_c1, c_fpa[] and c_one[] should all each have the same value. v_ref and c_c2 should be zero
+thermo_style    custom step v_fpa v_for c_c1 c_fpa[1] c_one[1] c_fpa[2] c_one[2] c_fpa[3] c_one[3] v_ref v_all
+thermo		10
+
+run 50
+PPPM initialization ...
+WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
+  G vector (1/distance) = 0.218482
+  grid = 15 15 15
+  stencil order = 5
+  estimated absolute RMS force accuracy = 0.0319435
+  estimated relative force accuracy = 9.61968e-05
+  using double precision FFTs
+  3d grid and FFT values/proc = 8000 3375
+WARNING: Compute force/tally only called from pair style (../compute_force_tally.cpp:77)
+WARNING: Compute force/tally only called from pair style (../compute_force_tally.cpp:77)
+Neighbor list info ...
+  update every 1 steps, delay 10 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 14
+  ghost atom cutoff = 14
+  binsize = 7, bins = 6 6 6
+  2 neighbor lists, perpetual/occasional/extra = 1 1 0
+  (1) pair lj/cut/coul/long, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+  (2) compute group/group, occasional, copy from (1)
+      attributes: half, newton on
+      pair build: copy
+      stencil: none
+      bin: none
+Per MPI rank memory allocation (min/avg/max) = 28.47 | 28.47 | 28.47 Mbytes
+Step v_fpa v_for c_c1 c_fpa[1] c_one[1] c_fpa[2] c_one[2] c_fpa[3] c_one[3] v_ref v_all 
+       0      22.7331      22.7331      22.7331   -17.068295   -17.068295   -8.8348335   -8.8348334   -12.141369   -12.141369            0            0 
+      10    11.736901    11.736901    11.736901   -3.3897029   -3.3897029    9.1193856    9.1193856   -6.5651786   -6.5651786            0            0 
+      20    5.6120339    5.6120339    5.6120339  -0.60046861  -0.60046861   -4.4481306   -4.4481306    3.3687528    3.3687528            0            0 
+      30     17.29261     17.29261     17.29261     6.179302     6.179302   -10.593979   -10.593979    12.190906    12.190906            0            0 
+      40    18.664433    18.664433    18.664433    5.4727782    5.4727782   -6.9329319   -6.9329319    16.442148    16.442148            0            0 
+      50    12.130407    12.130407    12.130407   -1.0321196   -1.0321196    8.0035558    8.0035558   -9.0567428   -9.0567428            0            0 
+Loop time of 13.9507 on 1 procs for 50 steps with 4500 atoms
+
+Performance: 0.619 ns/day, 38.752 hours/ns, 3.584 timesteps/s
+32.0% CPU use with 1 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 12.594     | 12.594     | 12.594     |   0.0 | 90.27
+Bond    | 7.3e-05    | 7.3e-05    | 7.3e-05    |   0.0 |  0.00
+Kspace  | 0.56296    | 0.56296    | 0.56296    |   0.0 |  4.04
+Neigh   | 0.65858    | 0.65858    | 0.65858    |   0.0 |  4.72
+Comm    | 0.019093   | 0.019093   | 0.019093   |   0.0 |  0.14
+Output  | 0.055025   | 0.055025   | 0.055025   |   0.0 |  0.39
+Modify  | 0.057276   | 0.057276   | 0.057276   |   0.0 |  0.41
+Other   |            | 0.004003   |            |       |  0.03
+
+Nlocal:    4500 ave 4500 max 4500 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:    21131 ave 21131 max 21131 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    2.60198e+06 ave 2.60198e+06 max 2.60198e+06 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 2601983
+Ave neighs/atom = 578.218
+Ave special neighs/atom = 2
+Neighbor list builds = 4
+Dangerous builds = 1
+
+Total wall time: 0:00:15
diff --git a/examples/USER/tally/log.12Jun17.force.4 b/examples/USER/tally/log.12Jun17.force.4
new file mode 100644
index 0000000000000000000000000000000000000000..4238173fb60e3a5eb806da82101b0282c0185cac
--- /dev/null
+++ b/examples/USER/tally/log.12Jun17.force.4
@@ -0,0 +1,177 @@
+LAMMPS (19 May 2017)
+
+units		real
+atom_style	full
+
+read_data	data.spce
+  orthogonal box = (0.02645 0.02645 0.02641) to (35.5328 35.5328 35.4736)
+  2 by 2 by 1 MPI processor grid
+  reading atoms ...
+  4500 atoms
+  scanning bonds ...
+  2 = max bonds/atom
+  scanning angles ...
+  1 = max angles/atom
+  reading bonds ...
+  3000 bonds
+  reading angles ...
+  1500 angles
+  2 = max # of 1-2 neighbors
+  1 = max # of 1-3 neighbors
+  1 = max # of 1-4 neighbors
+  2 = max # of special neighbors
+
+pair_style	lj/cut/coul/long 12.0 12.0
+kspace_style	pppm 1.0e-4
+
+pair_coeff	1 1 0.15535 3.166
+pair_coeff	* 2 0.0000 0.0000
+
+bond_style	harmonic
+angle_style	harmonic
+dihedral_style	none
+improper_style	none
+
+bond_coeff	1 1000.00 1.000
+angle_coeff	1 100.0 109.47
+
+special_bonds   lj/coul 0.0 0.0 1.0
+  2 = max # of 1-2 neighbors
+  1 = max # of 1-3 neighbors
+  2 = max # of special neighbors
+
+neighbor        2.0 bin
+
+fix		1 all shake 0.0001 20 0 b 1 a 1
+  0 = # of size 2 clusters
+  0 = # of size 3 clusters
+  0 = # of size 4 clusters
+  1500 = # of frozen angles
+fix		2 all nvt temp 300.0 300.0 100.0
+
+# make certain that shake constraints are satisfied
+run 0 post no
+PPPM initialization ...
+WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
+  G vector (1/distance) = 0.218482
+  grid = 15 15 15
+  stencil order = 5
+  estimated absolute RMS force accuracy = 0.0319435
+  estimated relative force accuracy = 9.61968e-05
+  using double precision FFTs
+  3d grid and FFT values/proc = 3380 960
+Neighbor list info ...
+  update every 1 steps, delay 10 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 14
+  ghost atom cutoff = 14
+  binsize = 7, bins = 6 6 6
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair lj/cut/coul/long, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 10.6 | 10.61 | 10.61 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0            0   -16692.358            0   -16692.358   -1289.8319 
+Loop time of 4.5e-06 on 4 procs for 0 steps with 4500 atoms
+
+
+group		one molecule 1 2
+6 atoms in group one
+
+# the following section shows equivalences between using the force/tally compute and other computes and thermo keywords
+
+# compute pairwise force between two molecules and everybody
+compute		fpa one group/group all pair yes kspace no boundary no
+# tally pairwise force between two molecules and the all molecules
+compute		c1 one force/tally all
+# tally the force of all with all (should be zero)
+compute		c2 all force/tally all
+# collect per atom data. only reduce over the first group.
+compute		one one reduce sum c_c1[1] c_c1[2] c_c1[3]
+compute		red all reduce sum c_c2[1] c_c2[2] c_c2[3]
+# determine magnitude of force
+variable	fpa equal sqrt(c_fpa[1]*c_fpa[1]+c_fpa[2]*c_fpa[2]+c_fpa[3]*c_fpa[3])
+variable	for equal sqrt(c_one[1]*c_one[1]+c_one[2]*c_one[2]+c_one[3]*c_one[3])
+# round to 10**-10 absolute precision.
+variable	ref equal round(1e10*sqrt(c_red[1]*c_red[1]+c_red[2]*c_red[2]+c_red[3]*c_red[3]))*1e-10
+variable	all equal round(1e10*c_c2)*1e-10
+
+velocity	all create 300 432567 dist uniform
+
+timestep	2.0
+
+# v_fpa and v_for and c_c1, c_fpa[] and c_one[] should all each have the same value. v_ref and c_c2 should be zero
+thermo_style    custom step v_fpa v_for c_c1 c_fpa[1] c_one[1] c_fpa[2] c_one[2] c_fpa[3] c_one[3] v_ref v_all
+thermo		10
+
+run 50
+PPPM initialization ...
+WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
+  G vector (1/distance) = 0.218482
+  grid = 15 15 15
+  stencil order = 5
+  estimated absolute RMS force accuracy = 0.0319435
+  estimated relative force accuracy = 9.61968e-05
+  using double precision FFTs
+  3d grid and FFT values/proc = 3380 960
+WARNING: Compute force/tally only called from pair style (../compute_force_tally.cpp:77)
+WARNING: Compute force/tally only called from pair style (../compute_force_tally.cpp:77)
+Neighbor list info ...
+  update every 1 steps, delay 10 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 14
+  ghost atom cutoff = 14
+  binsize = 7, bins = 6 6 6
+  2 neighbor lists, perpetual/occasional/extra = 1 1 0
+  (1) pair lj/cut/coul/long, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+  (2) compute group/group, occasional, copy from (1)
+      attributes: half, newton on
+      pair build: copy
+      stencil: none
+      bin: none
+Per MPI rank memory allocation (min/avg/max) = 11.58 | 11.59 | 11.6 Mbytes
+Step v_fpa v_for c_c1 c_fpa[1] c_one[1] c_fpa[2] c_one[2] c_fpa[3] c_one[3] v_ref v_all 
+       0      22.7331      22.7331      22.7331   -17.068295   -17.068295   -8.8348335   -8.8348334   -12.141369   -12.141369            0            0 
+      10    11.736901    11.736901    11.736901   -3.3897029   -3.3897029    9.1193856    9.1193856   -6.5651786   -6.5651786            0            0 
+      20    5.6120339    5.6120339    5.6120339  -0.60046861  -0.60046861   -4.4481306   -4.4481306    3.3687528    3.3687528            0            0 
+      30     17.29261     17.29261     17.29261     6.179302     6.179302   -10.593979   -10.593979    12.190906    12.190906            0            0 
+      40    18.664433    18.664433    18.664433    5.4727782    5.4727782   -6.9329319   -6.9329319    16.442148    16.442148            0            0 
+      50    12.130407    12.130407    12.130407   -1.0321196   -1.0321196    8.0035558    8.0035558   -9.0567428   -9.0567428            0            0 
+Loop time of 4.31614 on 4 procs for 50 steps with 4500 atoms
+
+Performance: 2.002 ns/day, 11.989 hours/ns, 11.584 timesteps/s
+31.6% CPU use with 4 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 3.5075     | 3.6114     | 3.7489     |   4.7 | 83.67
+Bond    | 8.6e-05    | 0.00010525 | 0.000141   |   0.0 |  0.00
+Kspace  | 0.2581     | 0.39489    | 0.49723    |  14.2 |  9.15
+Neigh   | 0.19826    | 0.19888    | 0.19918    |   0.1 |  4.61
+Comm    | 0.034639   | 0.037137   | 0.038938   |   0.9 |  0.86
+Output  | 0.025465   | 0.025997   | 0.027558   |   0.6 |  0.60
+Modify  | 0.044022   | 0.044175   | 0.044407   |   0.1 |  1.02
+Other   |            | 0.003593   |            |       |  0.08
+
+Nlocal:    1125 ave 1148 max 1097 min
+Histogram: 1 0 0 1 0 0 0 0 1 1
+Nghost:    12212.5 ave 12269 max 12162 min
+Histogram: 1 0 0 1 0 1 0 0 0 1
+Neighs:    650496 ave 675112 max 631353 min
+Histogram: 1 0 0 1 1 0 0 0 0 1
+
+Total # of neighbors = 2601983
+Ave neighs/atom = 578.218
+Ave special neighs/atom = 2
+Neighbor list builds = 4
+Dangerous builds = 1
+
+Total wall time: 0:00:04
diff --git a/examples/USER/tally/log.21Aug15.pe.1 b/examples/USER/tally/log.12Jun17.pe.1
similarity index 50%
rename from examples/USER/tally/log.21Aug15.pe.1
rename to examples/USER/tally/log.12Jun17.pe.1
index aea553003871b4fec713295409636c14f3afb446..8b0f7534144b7b00ad05ec59926250503934a0a9 100644
--- a/examples/USER/tally/log.21Aug15.pe.1
+++ b/examples/USER/tally/log.12Jun17.pe.1
@@ -1,5 +1,4 @@
-LAMMPS (21 Aug 2015-ICMS)
-  using 1 OpenMP thread(s) per MPI task
+LAMMPS (19 May 2017)
 
 units		real
 atom_style	full
@@ -50,6 +49,35 @@ fix		1 all shake 0.0001 20 0 b 1 a 1
   1500 = # of frozen angles
 fix		2 all nvt temp 300.0 300.0 100.0
 
+# make certain that shake constraints are satisfied
+run 0 post no
+PPPM initialization ...
+WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
+  G vector (1/distance) = 0.218482
+  grid = 15 15 15
+  stencil order = 5
+  estimated absolute RMS force accuracy = 0.0319435
+  estimated relative force accuracy = 9.61968e-05
+  using double precision FFTs
+  3d grid and FFT values/proc = 8000 3375
+Neighbor list info ...
+  update every 1 steps, delay 10 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 14
+  ghost atom cutoff = 14
+  binsize = 7, bins = 6 6 6
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair lj/cut/coul/long, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 26.54 | 26.54 | 26.54 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0            0   -16692.358            0   -16692.358   -1289.8319 
+Loop time of 1e-06 on 1 procs for 0 steps with 4500 atoms
+
+
 group		oxy type 1
 1500 atoms in group oxy
 group		hyd type 2
@@ -88,6 +116,7 @@ thermo		10
 
 run 50
 PPPM initialization ...
+WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
   G vector (1/distance) = 0.218482
   grid = 15 15 15
   stencil order = 5
@@ -95,38 +124,49 @@ PPPM initialization ...
   estimated relative force accuracy = 9.61968e-05
   using double precision FFTs
   3d grid and FFT values/proc = 8000 3375
-WARNING: Compute pe/tally only called from pair style (../compute_pe_tally.cpp:75)
-WARNING: Compute pe/tally only called from pair style (../compute_pe_tally.cpp:75)
+WARNING: Compute pe/tally only called from pair style (../compute_pe_tally.cpp:77)
+WARNING: Compute pe/tally only called from pair style (../compute_pe_tally.cpp:77)
 Neighbor list info ...
-  2 neighbor list requests
   update every 1 steps, delay 10 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
   master list distance cutoff = 14
   ghost atom cutoff = 14
-  binsize = 7 -> bins = 6 6 6
-Memory usage per processor = 17.381 Mbytes
-Step epa epa E_vdwl vdwl E_coul coul eref pe c2 pair 
-       0   -516632.19   -516632.19    3169.9382    3169.9382    46213.889    46213.889    49383.827    49383.827    49383.827    49383.827 
-      10   -517027.36   -517027.36    3099.1322    3099.1322     45891.84     45891.84    48990.972    48990.972    48990.972    48990.972 
-      20   -516828.06   -516828.06    3101.4321    3101.4321     45884.14     45884.14    48985.572    48985.572    48985.572    48985.572 
-      30    -517032.1    -517032.1    3198.5939    3198.5939    45793.571    45793.571    48992.165    48992.165    48992.165    48992.165 
-      40   -517095.56   -517095.56    3244.0797    3244.0797    45715.265    45715.265    48959.345    48959.345    48959.345    48959.345 
-      50   -517273.54   -517273.54    3274.9142    3274.9142    45665.997    45665.997    48940.911    48940.911    48940.911    48940.911 
-
-Loop time of 4.31105 on 1 procs for 50 steps with 4500 atoms
-100.1% CPU use with 1 MPI tasks x 1 OpenMP threads
-Performance: 2.004 ns/day  11.975 hours/ns  11.598 timesteps/s
-
-MPI task timings breakdown:
+  binsize = 7, bins = 6 6 6
+  2 neighbor lists, perpetual/occasional/extra = 1 1 0
+  (1) pair lj/cut/coul/long, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+  (2) compute group/group, occasional, copy from (1)
+      attributes: half, newton on
+      pair build: copy
+      stencil: none
+      bin: none
+Per MPI rank memory allocation (min/avg/max) = 29.08 | 29.08 | 29.08 Mbytes
+Step c_epa v_epa E_vdwl v_vdwl E_coul v_coul v_eref v_pe c_c2 v_pair 
+       0   -516634.27   -516634.27    3169.9427    3169.9427    46212.482    46212.482    49382.425    49382.425    49382.425    49382.425 
+      10   -517027.35   -517027.35    3099.1374    3099.1374    45891.866    45891.866    48991.003    48991.003    48991.003    48991.003 
+      20   -516828.05   -516828.05    3101.4373    3101.4373    45884.156    45884.156    48985.594    48985.594    48985.594    48985.594 
+      30   -517032.07   -517032.07    3198.5951    3198.5951    45793.595    45793.595    48992.191    48992.191    48992.191    48992.191 
+      40   -517095.54   -517095.54    3244.0771    3244.0771    45715.292    45715.292    48959.369    48959.369    48959.369    48959.369 
+      50    -517273.5    -517273.5    3274.9097    3274.9097    45666.025    45666.025    48940.935    48940.935    48940.935    48940.935 
+Loop time of 15.3339 on 1 procs for 50 steps with 4500 atoms
+
+Performance: 0.563 ns/day, 42.594 hours/ns, 3.261 timesteps/s
+32.0% CPU use with 1 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 3.5071     | 3.5071     | 3.5071     |   0.0 | 81.35
-Bond    | 0.00025034 | 0.00025034 | 0.00025034 |   0.0 |  0.01
-Kspace  | 0.19991    | 0.19991    | 0.19991    |   0.0 |  4.64
-Neigh   | 0.31459    | 0.31459    | 0.31459    |   0.0 |  7.30
-Comm    | 0.010338   | 0.010338   | 0.010338   |   0.0 |  0.24
-Output  | 0.24722    | 0.24722    | 0.24722    |   0.0 |  5.73
-Modify  | 0.029466   | 0.029466   | 0.029466   |   0.0 |  0.68
-Other   |            | 0.002182   |            |       |  0.05
+Pair    | 13.432     | 13.432     | 13.432     |   0.0 | 87.60
+Bond    | 0.000365   | 0.000365   | 0.000365   |   0.0 |  0.00
+Kspace  | 0.581      | 0.581      | 0.581      |   0.0 |  3.79
+Neigh   | 0.66081    | 0.66081    | 0.66081    |   0.0 |  4.31
+Comm    | 0.019908   | 0.019908   | 0.019908   |   0.0 |  0.13
+Output  | 0.57731    | 0.57731    | 0.57731    |   0.0 |  3.76
+Modify  | 0.058515   | 0.058515   | 0.058515   |   0.0 |  0.38
+Other   |            | 0.003889   |            |       |  0.03
 
 Nlocal:    4500 ave 4500 max 4500 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
@@ -135,10 +175,10 @@ Histogram: 1 0 0 0 0 0 0 0 0 0
 Neighs:    2.60198e+06 ave 2.60198e+06 max 2.60198e+06 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
 
-Total # of neighbors = 2601984
-Ave neighs/atom = 578.219
+Total # of neighbors = 2601983
+Ave neighs/atom = 578.218
 Ave special neighs/atom = 2
 Neighbor list builds = 4
 Dangerous builds = 1
 
-Total wall time: 0:00:04
+Total wall time: 0:00:16
diff --git a/examples/USER/tally/log.21Aug15.pe.4 b/examples/USER/tally/log.12Jun17.pe.4
similarity index 50%
rename from examples/USER/tally/log.21Aug15.pe.4
rename to examples/USER/tally/log.12Jun17.pe.4
index 303af555ff2655073827812bfdeab7e737731b39..f684fabe01032051684737adf1241a6b0471a78a 100644
--- a/examples/USER/tally/log.21Aug15.pe.4
+++ b/examples/USER/tally/log.12Jun17.pe.4
@@ -1,5 +1,4 @@
-LAMMPS (21 Aug 2015-ICMS)
-  using 1 OpenMP thread(s) per MPI task
+LAMMPS (19 May 2017)
 
 units		real
 atom_style	full
@@ -50,6 +49,35 @@ fix		1 all shake 0.0001 20 0 b 1 a 1
   1500 = # of frozen angles
 fix		2 all nvt temp 300.0 300.0 100.0
 
+# make certain that shake constraints are satisfied
+run 0 post no
+PPPM initialization ...
+WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
+  G vector (1/distance) = 0.218482
+  grid = 15 15 15
+  stencil order = 5
+  estimated absolute RMS force accuracy = 0.0319435
+  estimated relative force accuracy = 9.61968e-05
+  using double precision FFTs
+  3d grid and FFT values/proc = 3380 960
+Neighbor list info ...
+  update every 1 steps, delay 10 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 14
+  ghost atom cutoff = 14
+  binsize = 7, bins = 6 6 6
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair lj/cut/coul/long, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 10.6 | 10.61 | 10.61 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0            0   -16692.358            0   -16692.358   -1289.8319 
+Loop time of 1.75e-06 on 4 procs for 0 steps with 4500 atoms
+
+
 group		oxy type 1
 1500 atoms in group oxy
 group		hyd type 2
@@ -88,6 +116,7 @@ thermo		10
 
 run 50
 PPPM initialization ...
+WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
   G vector (1/distance) = 0.218482
   grid = 15 15 15
   stencil order = 5
@@ -95,38 +124,49 @@ PPPM initialization ...
   estimated relative force accuracy = 9.61968e-05
   using double precision FFTs
   3d grid and FFT values/proc = 3380 960
-WARNING: Compute pe/tally only called from pair style (../compute_pe_tally.cpp:75)
-WARNING: Compute pe/tally only called from pair style (../compute_pe_tally.cpp:75)
+WARNING: Compute pe/tally only called from pair style (../compute_pe_tally.cpp:77)
+WARNING: Compute pe/tally only called from pair style (../compute_pe_tally.cpp:77)
 Neighbor list info ...
-  2 neighbor list requests
   update every 1 steps, delay 10 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
   master list distance cutoff = 14
   ghost atom cutoff = 14
-  binsize = 7 -> bins = 6 6 6
-Memory usage per processor = 8.44413 Mbytes
-Step epa epa E_vdwl vdwl E_coul coul eref pe c2 pair 
-       0   -516632.19   -516632.19    3169.9382    3169.9382    46213.889    46213.889    49383.827    49383.827    49383.827    49383.827 
-      10   -517027.36   -517027.36    3099.1322    3099.1322     45891.84     45891.84    48990.972    48990.972    48990.972    48990.972 
-      20   -516828.06   -516828.06    3101.4321    3101.4321     45884.14     45884.14    48985.572    48985.572    48985.572    48985.572 
-      30    -517032.1    -517032.1    3198.5939    3198.5939    45793.571    45793.571    48992.165    48992.165    48992.165    48992.165 
-      40   -517095.56   -517095.56    3244.0797    3244.0797    45715.265    45715.265    48959.345    48959.345    48959.345    48959.345 
-      50   -517273.54   -517273.54    3274.9142    3274.9142    45665.997    45665.997    48940.911    48940.911    48940.911    48940.911 
-
-Loop time of 1.20533 on 4 procs for 50 steps with 4500 atoms
-100.0% CPU use with 4 MPI tasks x 1 OpenMP threads
-Performance: 7.168 ns/day  3.348 hours/ns  41.482 timesteps/s
-
-MPI task timings breakdown:
+  binsize = 7, bins = 6 6 6
+  2 neighbor lists, perpetual/occasional/extra = 1 1 0
+  (1) pair lj/cut/coul/long, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+  (2) compute group/group, occasional, copy from (1)
+      attributes: half, newton on
+      pair build: copy
+      stencil: none
+      bin: none
+Per MPI rank memory allocation (min/avg/max) = 11.86 | 11.87 | 11.88 Mbytes
+Step c_epa v_epa E_vdwl v_vdwl E_coul v_coul v_eref v_pe c_c2 v_pair 
+       0   -516634.27   -516634.27    3169.9427    3169.9427    46212.482    46212.482    49382.425    49382.425    49382.425    49382.425 
+      10   -517027.35   -517027.35    3099.1374    3099.1374    45891.866    45891.866    48991.003    48991.003    48991.003    48991.003 
+      20   -516828.05   -516828.05    3101.4373    3101.4373    45884.156    45884.156    48985.594    48985.594    48985.594    48985.594 
+      30   -517032.07   -517032.07    3198.5951    3198.5951    45793.595    45793.595    48992.191    48992.191    48992.191    48992.191 
+      40   -517095.54   -517095.54    3244.0771    3244.0771    45715.292    45715.292    48959.369    48959.369    48959.369    48959.369 
+      50    -517273.5    -517273.5    3274.9097    3274.9097    45666.025    45666.025    48940.935    48940.935    48940.935    48940.935 
+Loop time of 2.32344 on 4 procs for 50 steps with 4500 atoms
+
+Performance: 3.719 ns/day, 6.454 hours/ns, 21.520 timesteps/s
+64.0% CPU use with 4 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 0.87053    | 0.90325    | 0.94364    |   2.8 | 74.94
-Bond    | 0.00015402 | 0.00018191 | 0.00020909 |   0.2 |  0.02
-Kspace  | 0.061657   | 0.10164    | 0.13394    |   8.4 |  8.43
-Neigh   | 0.088292   | 0.088332   | 0.088373   |   0.0 |  7.33
-Comm    | 0.017319   | 0.017806   | 0.018291   |   0.4 |  1.48
-Output  | 0.07067    | 0.070706   | 0.070813   |   0.0 |  5.87
-Modify  | 0.021655   | 0.021694   | 0.02173    |   0.0 |  1.80
-Other   |            | 0.001719   |            |       |  0.14
+Pair    | 1.5561     | 1.8883     | 2.0327     |  14.1 | 81.27
+Bond    | 8.8e-05    | 0.000116   | 0.000135   |   0.0 |  0.00
+Kspace  | 0.094718   | 0.1933     | 0.26055    |  14.1 |  8.32
+Neigh   | 0.085117   | 0.1073     | 0.1147     |   3.9 |  4.62
+Comm    | 0.014156   | 0.017907   | 0.020005   |   1.8 |  0.77
+Output  | 0.071634   | 0.090599   | 0.097665   |   3.6 |  3.90
+Modify  | 0.019447   | 0.024101   | 0.026277   |   1.8 |  1.04
+Other   |            | 0.001804   |            |       |  0.08
 
 Nlocal:    1125 ave 1148 max 1097 min
 Histogram: 1 0 0 1 0 0 0 0 1 1
@@ -135,10 +175,10 @@ Histogram: 1 0 0 1 0 1 0 0 0 1
 Neighs:    650496 ave 675112 max 631353 min
 Histogram: 1 0 0 1 1 0 0 0 0 1
 
-Total # of neighbors = 2601984
-Ave neighs/atom = 578.219
+Total # of neighbors = 2601983
+Ave neighs/atom = 578.218
 Ave special neighs/atom = 2
 Neighbor list builds = 4
 Dangerous builds = 1
 
-Total wall time: 0:00:01
+Total wall time: 0:00:02
diff --git a/examples/USER/tally/log.21Aug15.stress.1 b/examples/USER/tally/log.12Jun17.stress.1
similarity index 56%
rename from examples/USER/tally/log.21Aug15.stress.1
rename to examples/USER/tally/log.12Jun17.stress.1
index c20b51559655d55f10d69024485f95f791214975..a76012487cca3c6d73f4331fc27f62165d326c8d 100644
--- a/examples/USER/tally/log.21Aug15.stress.1
+++ b/examples/USER/tally/log.12Jun17.stress.1
@@ -1,5 +1,4 @@
-LAMMPS (21 Aug 2015-ICMS)
-  using 1 OpenMP thread(s) per MPI task
+LAMMPS (19 May 2017)
 
 units		real
 atom_style	full
@@ -50,6 +49,35 @@ fix		1 all shake 0.0001 20 0 b 1 a 1
   1500 = # of frozen angles
 fix		2 all nvt temp 300.0 300.0 100.0
 
+# make certain that shake constraints are satisfied
+run 0 post no
+PPPM initialization ...
+WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
+  G vector (1/distance) = 0.218482
+  grid = 15 15 15
+  stencil order = 5
+  estimated absolute RMS force accuracy = 0.0319435
+  estimated relative force accuracy = 9.61968e-05
+  using double precision FFTs
+  3d grid and FFT values/proc = 8000 3375
+Neighbor list info ...
+  update every 1 steps, delay 10 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 14
+  ghost atom cutoff = 14
+  binsize = 7, bins = 6 6 6
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair lj/cut/coul/long, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 26.54 | 26.54 | 26.54 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0            0   -16692.358            0   -16692.358   -1289.8319 
+Loop time of 2e-06 on 1 procs for 0 steps with 4500 atoms
+
+
 group		one molecule 1 2
 6 atoms in group one
 
@@ -79,6 +107,7 @@ thermo		10
 
 run 50
 PPPM initialization ...
+WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
   G vector (1/distance) = 0.218482
   grid = 15 15 15
   stencil order = 5
@@ -86,38 +115,32 @@ PPPM initialization ...
   estimated relative force accuracy = 9.61968e-05
   using double precision FFTs
   3d grid and FFT values/proc = 8000 3375
-WARNING: Compute stress/tally only called from pair style (../compute_stress_tally.cpp:75)
-WARNING: Compute stress/tally only called from pair style (../compute_stress_tally.cpp:75)
-Neighbor list info ...
-  1 neighbor list requests
-  update every 1 steps, delay 10 steps, check yes
-  master list distance cutoff = 14
-  ghost atom cutoff = 14
-  binsize = 7 -> bins = 6 6 6
-Memory usage per processor = 24.631 Mbytes
-Step press spa press one ref 
-       0    26497.547    26497.547    26497.547   -2357033.6   -2357033.6 
-      10    23665.073    23665.073    23665.073   -2096057.3   -2096057.3 
-      20    23338.149    23338.149    23338.149     -2034283     -2034283 
-      30      25946.4      25946.4      25946.4     -2002817     -2002817 
-      40    27238.349    27238.349    27238.349   -2155411.5   -2155411.5 
-      50    27783.092    27783.092    27783.092   -1862190.3   -1862190.3 
-
-Loop time of 4.15609 on 1 procs for 50 steps with 4500 atoms
-100.1% CPU use with 1 MPI tasks x 1 OpenMP threads
-Performance: 2.079 ns/day  11.545 hours/ns  12.031 timesteps/s
-
-MPI task timings breakdown:
+WARNING: Compute stress/tally only called from pair style (../compute_stress_tally.cpp:79)
+WARNING: Compute stress/tally only called from pair style (../compute_stress_tally.cpp:79)
+Per MPI rank memory allocation (min/avg/max) = 35.9 | 35.9 | 35.9 Mbytes
+Step c_press v_spa v_press v_one v_ref 
+       0    26496.811    26496.811    26496.811   -2356992.7   -2356992.7 
+      10    23665.129    23665.129    23665.129     -2096059     -2096059 
+      20    23338.197    23338.197    23338.197   -2034284.1   -2034284.1 
+      30    25946.434    25946.434    25946.434   -2002815.3   -2002815.3 
+      40    27238.374    27238.374    27238.374   -2155408.7   -2155408.7 
+      50    27783.107    27783.107    27783.107   -1862191.5   -1862191.5 
+Loop time of 14.2089 on 1 procs for 50 steps with 4500 atoms
+
+Performance: 0.608 ns/day, 39.469 hours/ns, 3.519 timesteps/s
+32.0% CPU use with 1 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 3.6444     | 3.6444     | 3.6444     |   0.0 | 87.69
-Bond    | 0.0016105  | 0.0016105  | 0.0016105  |   0.0 |  0.04
-Kspace  | 0.22345    | 0.22345    | 0.22345    |   0.0 |  5.38
-Neigh   | 0.23588    | 0.23588    | 0.23588    |   0.0 |  5.68
-Comm    | 0.010035   | 0.010035   | 0.010035   |   0.0 |  0.24
-Output  | 0.0084085  | 0.0084085  | 0.0084085  |   0.0 |  0.20
-Modify  | 0.029978   | 0.029978   | 0.029978   |   0.0 |  0.72
-Other   |            | 0.002368   |            |       |  0.06
+Pair    | 12.983     | 12.983     | 12.983     |   0.0 | 91.37
+Bond    | 0.002788   | 0.002788   | 0.002788   |   0.0 |  0.02
+Kspace  | 0.62745    | 0.62745    | 0.62745    |   0.0 |  4.42
+Neigh   | 0.49839    | 0.49839    | 0.49839    |   0.0 |  3.51
+Comm    | 0.018597   | 0.018597   | 0.018597   |   0.0 |  0.13
+Output  | 0.015852   | 0.015852   | 0.015852   |   0.0 |  0.11
+Modify  | 0.058415   | 0.058415   | 0.058415   |   0.0 |  0.41
+Other   |            | 0.004126   |            |       |  0.03
 
 Nlocal:    4500 ave 4500 max 4500 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
@@ -132,4 +155,4 @@ Ave special neighs/atom = 2
 Neighbor list builds = 3
 Dangerous builds = 0
 
-Total wall time: 0:00:04
+Total wall time: 0:00:15
diff --git a/examples/USER/tally/log.21Aug15.stress.4 b/examples/USER/tally/log.12Jun17.stress.4
similarity index 55%
rename from examples/USER/tally/log.21Aug15.stress.4
rename to examples/USER/tally/log.12Jun17.stress.4
index c681960c99d4759c302907fbb07791281d6c48d9..37bb60f013ee2b589a569a11a65139b6cc0a3f10 100644
--- a/examples/USER/tally/log.21Aug15.stress.4
+++ b/examples/USER/tally/log.12Jun17.stress.4
@@ -1,5 +1,4 @@
-LAMMPS (21 Aug 2015-ICMS)
-  using 1 OpenMP thread(s) per MPI task
+LAMMPS (19 May 2017)
 
 units		real
 atom_style	full
@@ -50,6 +49,35 @@ fix		1 all shake 0.0001 20 0 b 1 a 1
   1500 = # of frozen angles
 fix		2 all nvt temp 300.0 300.0 100.0
 
+# make certain that shake constraints are satisfied
+run 0 post no
+PPPM initialization ...
+WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
+  G vector (1/distance) = 0.218482
+  grid = 15 15 15
+  stencil order = 5
+  estimated absolute RMS force accuracy = 0.0319435
+  estimated relative force accuracy = 9.61968e-05
+  using double precision FFTs
+  3d grid and FFT values/proc = 3380 960
+Neighbor list info ...
+  update every 1 steps, delay 10 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 14
+  ghost atom cutoff = 14
+  binsize = 7, bins = 6 6 6
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair lj/cut/coul/long, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 10.6 | 10.61 | 10.61 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0            0   -16692.358            0   -16692.358   -1289.8319 
+Loop time of 4e-06 on 4 procs for 0 steps with 4500 atoms
+
+
 group		one molecule 1 2
 6 atoms in group one
 
@@ -79,6 +107,7 @@ thermo		10
 
 run 50
 PPPM initialization ...
+WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
   G vector (1/distance) = 0.218482
   grid = 15 15 15
   stencil order = 5
@@ -86,44 +115,38 @@ PPPM initialization ...
   estimated relative force accuracy = 9.61968e-05
   using double precision FFTs
   3d grid and FFT values/proc = 3380 960
-WARNING: Compute stress/tally only called from pair style (../compute_stress_tally.cpp:75)
-WARNING: Compute stress/tally only called from pair style (../compute_stress_tally.cpp:75)
-Neighbor list info ...
-  1 neighbor list requests
-  update every 1 steps, delay 10 steps, check yes
-  master list distance cutoff = 14
-  ghost atom cutoff = 14
-  binsize = 7 -> bins = 6 6 6
-Memory usage per processor = 12.0691 Mbytes
-Step press spa press one ref 
-       0    26497.547    26497.547    26497.547   -2357033.6   -2357033.6 
-      10    23665.073    23665.073    23665.073   -2096057.3   -2096057.3 
-      20    23338.149    23338.149    23338.149     -2034283     -2034283 
-      30      25946.4      25946.4      25946.4     -2002817     -2002817 
-      40    27238.349    27238.349    27238.349   -2155411.5   -2155411.5 
-      50    27783.092    27783.092    27783.092   -1862190.3   -1862190.3 
-
-Loop time of 1.17266 on 4 procs for 50 steps with 4500 atoms
-100.0% CPU use with 4 MPI tasks x 1 OpenMP threads
-Performance: 7.368 ns/day  3.257 hours/ns  42.638 timesteps/s
-
-MPI task timings breakdown:
+WARNING: Compute stress/tally only called from pair style (../compute_stress_tally.cpp:79)
+WARNING: Compute stress/tally only called from pair style (../compute_stress_tally.cpp:79)
+Per MPI rank memory allocation (min/avg/max) = 15.25 | 15.26 | 15.27 Mbytes
+Step c_press v_spa v_press v_one v_ref 
+       0    26496.811    26496.811    26496.811   -2356992.7   -2356992.7 
+      10    23665.129    23665.129    23665.129     -2096059     -2096059 
+      20    23338.197    23338.197    23338.197   -2034284.1   -2034284.1 
+      30    25946.434    25946.434    25946.434   -2002815.3   -2002815.3 
+      40    27238.374    27238.374    27238.374   -2155408.7   -2155408.7 
+      50    27783.107    27783.107    27783.107   -1862191.5   -1862191.5 
+Loop time of 4.32017 on 4 procs for 50 steps with 4500 atoms
+
+Performance: 2.000 ns/day, 12.000 hours/ns, 11.574 timesteps/s
+31.8% CPU use with 4 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 0.89832    | 0.93222    | 0.98611    |   3.4 | 79.50
-Bond    | 0.00081754 | 0.00096095 | 0.0011327  |   0.4 |  0.08
-Kspace  | 0.068058   | 0.12154    | 0.15522    |   9.4 | 10.36
-Neigh   | 0.065756   | 0.065785   | 0.065824   |   0.0 |  5.61
-Comm    | 0.017489   | 0.017982   | 0.018623   |   0.4 |  1.53
-Output  | 0.010985   | 0.011017   | 0.011111   |   0.1 |  0.94
-Modify  | 0.021429   | 0.021491   | 0.021551   |   0.0 |  1.83
-Other   |            | 0.001671   |            |       |  0.14
+Pair    | 3.5816     | 3.6917     | 3.839      |   4.9 | 85.45
+Bond    | 0.001579   | 0.0016563  | 0.001709   |   0.1 |  0.04
+Kspace  | 0.22505    | 0.3716     | 0.48023    |  15.3 |  8.60
+Neigh   | 0.14558    | 0.14568    | 0.14575    |   0.0 |  3.37
+Comm    | 0.032009   | 0.03441    | 0.036274   |   0.8 |  0.80
+Output  | 0.02253    | 0.023115   | 0.024844   |   0.7 |  0.54
+Modify  | 0.046954   | 0.047086   | 0.047132   |   0.0 |  1.09
+Other   |            | 0.004935   |            |       |  0.11
 
 Nlocal:    1125 ave 1154 max 1092 min
 Histogram: 1 0 0 0 1 0 1 0 0 1
 Nghost:    12263.5 ave 12300 max 12219 min
 Histogram: 1 0 1 0 0 0 0 0 0 2
-Neighs:    650438 ave 678786 max 626279 min
+Neighs:    650438 ave 678787 max 626279 min
 Histogram: 1 0 0 1 1 0 0 0 0 1
 
 Total # of neighbors = 2601750
@@ -132,4 +155,4 @@ Ave special neighs/atom = 2
 Neighbor list builds = 3
 Dangerous builds = 0
 
-Total wall time: 0:00:01
+Total wall time: 0:00:04
diff --git a/examples/USER/tally/log.21Aug15.force.1 b/examples/USER/tally/log.21Aug15.force.1
deleted file mode 100644
index 8e7bdb9520deef2758c2a556edb69f5483e1604d..0000000000000000000000000000000000000000
--- a/examples/USER/tally/log.21Aug15.force.1
+++ /dev/null
@@ -1,136 +0,0 @@
-LAMMPS (21 Aug 2015-ICMS)
-  using 1 OpenMP thread(s) per MPI task
-
-units		real
-atom_style	full
-
-read_data	data.spce
-  orthogonal box = (0.02645 0.02645 0.02641) to (35.5328 35.5328 35.4736)
-  1 by 1 by 1 MPI processor grid
-  reading atoms ...
-  4500 atoms
-  scanning bonds ...
-  2 = max bonds/atom
-  scanning angles ...
-  1 = max angles/atom
-  reading bonds ...
-  3000 bonds
-  reading angles ...
-  1500 angles
-  2 = max # of 1-2 neighbors
-  1 = max # of 1-3 neighbors
-  1 = max # of 1-4 neighbors
-  2 = max # of special neighbors
-
-pair_style	lj/cut/coul/long 12.0 12.0
-kspace_style	pppm 1.0e-4
-
-pair_coeff	1 1 0.15535 3.166
-pair_coeff	* 2 0.0000 0.0000
-
-bond_style	harmonic
-angle_style	harmonic
-dihedral_style	none
-improper_style	none
-
-bond_coeff	1 1000.00 1.000
-angle_coeff	1 100.0 109.47
-
-special_bonds   lj/coul 0.0 0.0 1.0
-  2 = max # of 1-2 neighbors
-  1 = max # of 1-3 neighbors
-  2 = max # of special neighbors
-
-neighbor        2.0 bin
-
-fix		1 all shake 0.0001 20 0 b 1 a 1
-  0 = # of size 2 clusters
-  0 = # of size 3 clusters
-  0 = # of size 4 clusters
-  1500 = # of frozen angles
-fix		2 all nvt temp 300.0 300.0 100.0
-
-group		one molecule 1 2
-6 atoms in group one
-
-# the following section shows equivalences between using the pe/tally compute and other computes and thermo keywords
-
-# compute pairwise force between two molecules and everybody
-compute		fpa one group/group all pair yes kspace no boundary no
-# tally pairwise force between two molecules and the all molecules
-compute		c1 one force/tally all
-# tally the force of all with all (should be zero)
-compute		c2 all force/tally all
-# collect per atom data. only reduce over the first group.
-compute		one one reduce sum c_c1[1] c_c1[2] c_c1[3]
-compute		red all reduce sum c_c2[1] c_c2[2] c_c2[3]
-# determine magnitude of force
-variable	fpa equal sqrt(c_fpa[1]*c_fpa[1]+c_fpa[2]*c_fpa[2]+c_fpa[3]*c_fpa[3])
-variable	for equal sqrt(c_one[1]*c_one[1]+c_one[2]*c_one[2]+c_one[3]*c_one[3])
-# round to 10**-10 absolute precision.
-variable	ref equal round(1e10*sqrt(c_red[1]*c_red[1]+c_red[2]*c_red[2]+c_red[3]*c_red[3]))*1e-10
-
-velocity	all create 300 432567 dist uniform
-
-timestep	2.0
-
-# v_fpa and v_for and c_c1, c_fpa[] and c_one[] should all each have the same value. v_ref and c_c2 should be zero
-thermo_style    custom step v_fpa v_for c_c1 c_fpa[1] c_one[1] c_fpa[2] c_one[2] c_fpa[3] c_one[3] v_ref  c_c2
-thermo		10
-
-run 50
-PPPM initialization ...
-  G vector (1/distance) = 0.218482
-  grid = 15 15 15
-  stencil order = 5
-  estimated absolute RMS force accuracy = 0.0319435
-  estimated relative force accuracy = 9.61968e-05
-  using double precision FFTs
-  3d grid and FFT values/proc = 8000 3375
-WARNING: Compute force/tally only called from pair style (../compute_force_tally.cpp:75)
-WARNING: Compute force/tally only called from pair style (../compute_force_tally.cpp:75)
-Neighbor list info ...
-  2 neighbor list requests
-  update every 1 steps, delay 10 steps, check yes
-  master list distance cutoff = 14
-  ghost atom cutoff = 14
-  binsize = 7 -> bins = 6 6 6
-Memory usage per processor = 16.7648 Mbytes
-Step fpa for c1 fpa[1] one[1] fpa[2] one[2] fpa[3] one[3] ref c2 
-       0    22.732789    22.732789    22.732789   -17.068392   -17.068392   -8.8345214   -8.8345214   -12.140878   -12.140878            0            0 
-      10    11.736915    11.736915    11.736915   -3.3898298   -3.3898298     9.119272     9.119272   -6.5652948   -6.5652948            0            0 
-      20    5.6119761    5.6119761    5.6119761  -0.60028931  -0.60028931   -4.4479886   -4.4479886     3.368876     3.368876            0            0 
-      30    17.292617    17.292617    17.292617    6.1793856    6.1793856   -10.593927   -10.593927    12.190919    12.190919            0            0 
-      40    18.664226    18.664226    18.664226    5.4725079    5.4725079    -6.933046    -6.933046    16.441955    16.441955            0            0 
-      50    12.130282    12.130282    12.130282   -1.0321244   -1.0321244    8.0032646    8.0032646   -9.0568326   -9.0568326            0            0 
-
-Loop time of 4.11825 on 1 procs for 50 steps with 4500 atoms
-100.0% CPU use with 1 MPI tasks x 1 OpenMP threads
-Performance: 2.098 ns/day  11.440 hours/ns  12.141 timesteps/s
-
-MPI task timings breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
----------------------------------------------------------------
-Pair    | 3.5286     | 3.5286     | 3.5286     |   0.0 | 85.68
-Bond    | 6.1274e-05 | 6.1274e-05 | 6.1274e-05 |   0.0 |  0.00
-Kspace  | 0.1937     | 0.1937     | 0.1937     |   0.0 |  4.70
-Neigh   | 0.31454    | 0.31454    | 0.31454    |   0.0 |  7.64
-Comm    | 0.01037    | 0.01037    | 0.01037    |   0.0 |  0.25
-Output  | 0.039355   | 0.039355   | 0.039355   |   0.0 |  0.96
-Modify  | 0.029273   | 0.029273   | 0.029273   |   0.0 |  0.71
-Other   |            | 0.002351   |            |       |  0.06
-
-Nlocal:    4500 ave 4500 max 4500 min
-Histogram: 1 0 0 0 0 0 0 0 0 0
-Nghost:    21131 ave 21131 max 21131 min
-Histogram: 1 0 0 0 0 0 0 0 0 0
-Neighs:    2.60198e+06 ave 2.60198e+06 max 2.60198e+06 min
-Histogram: 1 0 0 0 0 0 0 0 0 0
-
-Total # of neighbors = 2601984
-Ave neighs/atom = 578.219
-Ave special neighs/atom = 2
-Neighbor list builds = 4
-Dangerous builds = 1
-
-Total wall time: 0:00:04
diff --git a/examples/USER/tally/log.21Aug15.force.4 b/examples/USER/tally/log.21Aug15.force.4
deleted file mode 100644
index 17cbc50f02fba59ef993419f88e91428ac824653..0000000000000000000000000000000000000000
--- a/examples/USER/tally/log.21Aug15.force.4
+++ /dev/null
@@ -1,136 +0,0 @@
-LAMMPS (21 Aug 2015-ICMS)
-  using 1 OpenMP thread(s) per MPI task
-
-units		real
-atom_style	full
-
-read_data	data.spce
-  orthogonal box = (0.02645 0.02645 0.02641) to (35.5328 35.5328 35.4736)
-  2 by 2 by 1 MPI processor grid
-  reading atoms ...
-  4500 atoms
-  scanning bonds ...
-  2 = max bonds/atom
-  scanning angles ...
-  1 = max angles/atom
-  reading bonds ...
-  3000 bonds
-  reading angles ...
-  1500 angles
-  2 = max # of 1-2 neighbors
-  1 = max # of 1-3 neighbors
-  1 = max # of 1-4 neighbors
-  2 = max # of special neighbors
-
-pair_style	lj/cut/coul/long 12.0 12.0
-kspace_style	pppm 1.0e-4
-
-pair_coeff	1 1 0.15535 3.166
-pair_coeff	* 2 0.0000 0.0000
-
-bond_style	harmonic
-angle_style	harmonic
-dihedral_style	none
-improper_style	none
-
-bond_coeff	1 1000.00 1.000
-angle_coeff	1 100.0 109.47
-
-special_bonds   lj/coul 0.0 0.0 1.0
-  2 = max # of 1-2 neighbors
-  1 = max # of 1-3 neighbors
-  2 = max # of special neighbors
-
-neighbor        2.0 bin
-
-fix		1 all shake 0.0001 20 0 b 1 a 1
-  0 = # of size 2 clusters
-  0 = # of size 3 clusters
-  0 = # of size 4 clusters
-  1500 = # of frozen angles
-fix		2 all nvt temp 300.0 300.0 100.0
-
-group		one molecule 1 2
-6 atoms in group one
-
-# the following section shows equivalences between using the pe/tally compute and other computes and thermo keywords
-
-# compute pairwise force between two molecules and everybody
-compute		fpa one group/group all pair yes kspace no boundary no
-# tally pairwise force between two molecules and the all molecules
-compute		c1 one force/tally all
-# tally the force of all with all (should be zero)
-compute		c2 all force/tally all
-# collect per atom data. only reduce over the first group.
-compute		one one reduce sum c_c1[1] c_c1[2] c_c1[3]
-compute		red all reduce sum c_c2[1] c_c2[2] c_c2[3]
-# determine magnitude of force
-variable	fpa equal sqrt(c_fpa[1]*c_fpa[1]+c_fpa[2]*c_fpa[2]+c_fpa[3]*c_fpa[3])
-variable	for equal sqrt(c_one[1]*c_one[1]+c_one[2]*c_one[2]+c_one[3]*c_one[3])
-# round to 10**-10 absolute precision.
-variable	ref equal round(1e10*sqrt(c_red[1]*c_red[1]+c_red[2]*c_red[2]+c_red[3]*c_red[3]))*1e-10
-
-velocity	all create 300 432567 dist uniform
-
-timestep	2.0
-
-# v_fpa and v_for and c_c1, c_fpa[] and c_one[] should all each have the same value. v_ref and c_c2 should be zero
-thermo_style    custom step v_fpa v_for c_c1 c_fpa[1] c_one[1] c_fpa[2] c_one[2] c_fpa[3] c_one[3] v_ref  c_c2
-thermo		10
-
-run 50
-PPPM initialization ...
-  G vector (1/distance) = 0.218482
-  grid = 15 15 15
-  stencil order = 5
-  estimated absolute RMS force accuracy = 0.0319435
-  estimated relative force accuracy = 9.61968e-05
-  using double precision FFTs
-  3d grid and FFT values/proc = 3380 960
-WARNING: Compute force/tally only called from pair style (../compute_force_tally.cpp:75)
-WARNING: Compute force/tally only called from pair style (../compute_force_tally.cpp:75)
-Neighbor list info ...
-  2 neighbor list requests
-  update every 1 steps, delay 10 steps, check yes
-  master list distance cutoff = 14
-  ghost atom cutoff = 14
-  binsize = 7 -> bins = 6 6 6
-Memory usage per processor = 8.16441 Mbytes
-Step fpa for c1 fpa[1] one[1] fpa[2] one[2] fpa[3] one[3] ref c2 
-       0    22.732789    22.732789    22.732789   -17.068392   -17.068392   -8.8345214   -8.8345214   -12.140878   -12.140878            0            0 
-      10    11.736915    11.736915    11.736915   -3.3898298   -3.3898298     9.119272     9.119272   -6.5652948   -6.5652948            0            0 
-      20    5.6119761    5.6119761    5.6119761  -0.60028931  -0.60028931   -4.4479886   -4.4479886     3.368876     3.368876            0            0 
-      30    17.292617    17.292617    17.292617    6.1793856    6.1793856   -10.593927   -10.593927    12.190919    12.190919            0            0 
-      40    18.664226    18.664226    18.664226    5.4725079    5.4725079    -6.933046    -6.933046    16.441955    16.441955            0            0 
-      50    12.130282    12.130282    12.130282   -1.0321244   -1.0321244    8.0032646    8.0032646   -9.0568326   -9.0568326            0            0 
-
-Loop time of 1.13658 on 4 procs for 50 steps with 4500 atoms
-100.0% CPU use with 4 MPI tasks x 1 OpenMP threads
-Performance: 7.602 ns/day  3.157 hours/ns  43.991 timesteps/s
-
-MPI task timings breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
----------------------------------------------------------------
-Pair    | 0.85795    | 0.89088    | 0.93636    |   3.0 | 78.38
-Bond    | 3.4571e-05 | 4.4644e-05 | 5.4598e-05 |   0.1 |  0.00
-Kspace  | 0.059847   | 0.1051     | 0.1384     |   8.9 |  9.25
-Neigh   | 0.085891   | 0.085954   | 0.086      |   0.0 |  7.56
-Comm    | 0.01758    | 0.018091   | 0.019178   |   0.5 |  1.59
-Output  | 0.013697   | 0.013725   | 0.013805   |   0.0 |  1.21
-Modify  | 0.021068   | 0.021137   | 0.021205   |   0.0 |  1.86
-Other   |            | 0.001656   |            |       |  0.15
-
-Nlocal:    1125 ave 1148 max 1097 min
-Histogram: 1 0 0 1 0 0 0 0 1 1
-Nghost:    12212.5 ave 12269 max 12162 min
-Histogram: 1 0 0 1 0 1 0 0 0 1
-Neighs:    650496 ave 675112 max 631353 min
-Histogram: 1 0 0 1 1 0 0 0 0 1
-
-Total # of neighbors = 2601984
-Ave neighs/atom = 578.219
-Ave special neighs/atom = 2
-Neighbor list builds = 4
-Dangerous builds = 1
-
-Total wall time: 0:00:01