diff --git a/bench/log.15Feb16.chain.fixed.icc.1 b/bench/log.15Feb16.chain.fixed.icc.1
new file mode 100644
index 0000000000000000000000000000000000000000..553241bf228f9d75fecf4648baa018251f9fd808
--- /dev/null
+++ b/bench/log.15Feb16.chain.fixed.icc.1
@@ -0,0 +1,78 @@
+LAMMPS (15 Feb 2016)
+# FENE beadspring benchmark
+
+units		lj
+atom_style	bond
+special_bonds   fene
+
+read_data	data.chain
+  orthogonal box = (-16.796 -16.796 -16.796) to (16.796 16.796 16.796)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  32000 atoms
+  reading velocities ...
+  32000 velocities
+  scanning bonds ...
+  1 = max bonds/atom
+  reading bonds ...
+  31680 bonds
+  2 = max # of 1-2 neighbors
+  2 = max # of special neighbors
+
+neighbor	0.4 bin
+neigh_modify	every 1 delay 1
+
+bond_style      fene
+bond_coeff	1 30.0 1.5 1.0 1.0
+
+pair_style	lj/cut 1.12
+pair_modify	shift yes
+pair_coeff	1 1 1.0 1.0 1.12
+
+fix		1 all nve
+fix		2 all langevin 1.0 1.0 10.0 904297
+
+thermo          100
+timestep	0.012
+
+run		100
+Neighbor list info ...
+  1 neighbor list requests
+  update every 1 steps, delay 1 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 1.52
+  ghost atom cutoff = 1.52
+  binsize = 0.76 -> bins = 45 45 45
+Memory usage per processor = 11.5189 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0   0.97029772   0.44484087    20.494523    22.394765    4.6721833 
+     100    0.9729966    0.4361122    20.507698     22.40326    4.6548819 
+Loop time of 0.978585 on 1 procs for 100 steps with 32000 atoms
+
+Performance: 105948.895 tau/day, 102.188 timesteps/s
+100.0% CPU use with 1 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.19562    | 0.19562    | 0.19562    |   0.0 | 19.99
+Bond    | 0.087475   | 0.087475   | 0.087475   |   0.0 |  8.94
+Neigh   | 0.44861    | 0.44861    | 0.44861    |   0.0 | 45.84
+Comm    | 0.032932   | 0.032932   | 0.032932   |   0.0 |  3.37
+Output  | 0.00010395 | 0.00010395 | 0.00010395 |   0.0 |  0.01
+Modify  | 0.19413    | 0.19413    | 0.19413    |   0.0 | 19.84
+Other   |            | 0.01972    |            |       |  2.02
+
+Nlocal:    32000 ave 32000 max 32000 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:    9493 ave 9493 max 9493 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    155873 ave 155873 max 155873 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 155873
+Ave neighs/atom = 4.87103
+Ave special neighs/atom = 1.98
+Neighbor list builds = 25
+Dangerous builds = 0
+Total wall time: 0:00:01
diff --git a/bench/log.15Feb16.chain.fixed.icc.4 b/bench/log.15Feb16.chain.fixed.icc.4
new file mode 100644
index 0000000000000000000000000000000000000000..1ff445b27a153b43e0fb97a7f8cb6f6c1721a4e8
--- /dev/null
+++ b/bench/log.15Feb16.chain.fixed.icc.4
@@ -0,0 +1,78 @@
+LAMMPS (15 Feb 2016)
+# FENE beadspring benchmark
+
+units		lj
+atom_style	bond
+special_bonds   fene
+
+read_data	data.chain
+  orthogonal box = (-16.796 -16.796 -16.796) to (16.796 16.796 16.796)
+  1 by 2 by 2 MPI processor grid
+  reading atoms ...
+  32000 atoms
+  reading velocities ...
+  32000 velocities
+  scanning bonds ...
+  1 = max bonds/atom
+  reading bonds ...
+  31680 bonds
+  2 = max # of 1-2 neighbors
+  2 = max # of special neighbors
+
+neighbor	0.4 bin
+neigh_modify	every 1 delay 1
+
+bond_style      fene
+bond_coeff	1 30.0 1.5 1.0 1.0
+
+pair_style	lj/cut 1.12
+pair_modify	shift yes
+pair_coeff	1 1 1.0 1.0 1.12
+
+fix		1 all nve
+fix		2 all langevin 1.0 1.0 10.0 904297
+
+thermo          100
+timestep	0.012
+
+run		100
+Neighbor list info ...
+  1 neighbor list requests
+  update every 1 steps, delay 1 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 1.52
+  ghost atom cutoff = 1.52
+  binsize = 0.76 -> bins = 45 45 45
+Memory usage per processor = 3.91518 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0   0.97029772   0.44484087    20.494523    22.394765    4.6721833 
+     100   0.97145835   0.43803883    20.502691    22.397872     4.626988 
+Loop time of 0.271187 on 4 procs for 100 steps with 32000 atoms
+
+Performance: 382319.453 tau/day, 368.749 timesteps/s
+99.6% CPU use with 4 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.048621   | 0.050076   | 0.051229   |   0.4 | 18.47
+Bond    | 0.022254   | 0.022942   | 0.023567   |   0.3 |  8.46
+Neigh   | 0.11873    | 0.11881    | 0.11887    |   0.0 | 43.81
+Comm    | 0.019066   | 0.021357   | 0.024297   |   1.3 |  7.88
+Output  | 5.0068e-05 | 5.5015e-05 | 6.1035e-05 |   0.1 |  0.02
+Modify  | 0.048737   | 0.050198   | 0.051231   |   0.4 | 18.51
+Other   |            | 0.007751   |            |       |  2.86
+
+Nlocal:    8000 ave 8030 max 7974 min
+Histogram: 1 0 0 1 0 1 0 0 0 1
+Nghost:    4177 ave 4191 max 4160 min
+Histogram: 1 0 0 0 1 0 0 1 0 1
+Neighs:    38995.8 ave 39169 max 38852 min
+Histogram: 1 0 0 1 1 0 0 0 0 1
+
+Total # of neighbors = 155983
+Ave neighs/atom = 4.87447
+Ave special neighs/atom = 1.98
+Neighbor list builds = 25
+Dangerous builds = 0
+Total wall time: 0:00:00
diff --git a/bench/log.15Feb16.chain.scaled.icc.4 b/bench/log.15Feb16.chain.scaled.icc.4
new file mode 100644
index 0000000000000000000000000000000000000000..df541d404ce816d8b225ada75ff62ed542fbcd05
--- /dev/null
+++ b/bench/log.15Feb16.chain.scaled.icc.4
@@ -0,0 +1,94 @@
+LAMMPS (15 Feb 2016)
+# FENE beadspring benchmark
+
+variable	x index 1
+variable	y index 1
+variable	z index 1
+
+units		lj
+atom_style	bond
+atom_modify	map hash
+special_bonds   fene
+
+read_data	data.chain
+  orthogonal box = (-16.796 -16.796 -16.796) to (16.796 16.796 16.796)
+  1 by 2 by 2 MPI processor grid
+  reading atoms ...
+  32000 atoms
+  reading velocities ...
+  32000 velocities
+  scanning bonds ...
+  1 = max bonds/atom
+  reading bonds ...
+  31680 bonds
+  2 = max # of 1-2 neighbors
+  2 = max # of special neighbors
+
+replicate	$x $y $z
+replicate	2 $y $z
+replicate	2 2 $z
+replicate	2 2 1
+  orthogonal box = (-16.796 -16.796 -16.796) to (50.388 50.388 16.796)
+  2 by 2 by 1 MPI processor grid
+  128000 atoms
+  126720 bonds
+  2 = max # of 1-2 neighbors
+  2 = max # of special neighbors
+
+neighbor	0.4 bin
+neigh_modify	every 1 delay 1
+
+bond_style      fene
+bond_coeff	1 30.0 1.5 1.0 1.0
+
+pair_style	lj/cut 1.12
+pair_modify	shift yes
+pair_coeff	1 1 1.0 1.0 1.12
+
+fix		1 all nve
+fix		2 all langevin 1.0 1.0 10.0 904297
+
+thermo          100
+timestep	0.012
+
+run		100
+Neighbor list info ...
+  1 neighbor list requests
+  update every 1 steps, delay 1 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 1.52
+  ghost atom cutoff = 1.52
+  binsize = 0.76 -> bins = 89 89 45
+Memory usage per processor = 12.8735 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0   0.97027498   0.44484087    20.494523    22.394765    4.6721833 
+     100   0.97682955   0.44239968    20.500229    22.407862    4.6527025 
+Loop time of 1.20889 on 4 procs for 100 steps with 128000 atoms
+
+Performance: 85764.410 tau/day, 82.720 timesteps/s
+99.8% CPU use with 4 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.21738    | 0.23306    | 0.23926    |   1.9 | 19.28
+Bond    | 0.094536   | 0.10196    | 0.10534    |   1.4 |  8.43
+Neigh   | 0.52311    | 0.52392    | 0.52519    |   0.1 | 43.34
+Comm    | 0.090161   | 0.10022    | 0.12557    |   4.7 |  8.29
+Output  | 0.00012207 | 0.00017327 | 0.00019598 |   0.2 |  0.01
+Modify  | 0.19662    | 0.20262    | 0.20672    |   0.8 | 16.76
+Other   |            | 0.04694    |            |       |  3.88
+
+Nlocal:    32000 ave 32015 max 31983 min
+Histogram: 1 0 1 0 0 0 0 0 1 1
+Nghost:    9492 ave 9522 max 9432 min
+Histogram: 1 0 0 0 0 0 1 0 0 2
+Neighs:    155837 ave 156079 max 155506 min
+Histogram: 1 0 0 0 0 1 0 0 1 1
+
+Total # of neighbors = 623349
+Ave neighs/atom = 4.86991
+Ave special neighs/atom = 1.98
+Neighbor list builds = 25
+Dangerous builds = 0
+Total wall time: 0:00:01
diff --git a/bench/log.15Feb16.chute.fixed.icc.1 b/bench/log.15Feb16.chute.fixed.icc.1
new file mode 100644
index 0000000000000000000000000000000000000000..e256524152f4c7c02f6781c566c75f3f8e226a52
--- /dev/null
+++ b/bench/log.15Feb16.chute.fixed.icc.1
@@ -0,0 +1,80 @@
+LAMMPS (15 Feb 2016)
+# LAMMPS benchmark of granular flow
+# chute flow of 32000 atoms with frozen base at 26 degrees
+
+units		lj
+atom_style	sphere
+boundary	p p fs
+newton		off
+comm_modify	vel yes
+
+read_data	data.chute
+  orthogonal box = (0 0 0) to (40 20 37.2886)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  32000 atoms
+  reading velocities ...
+  32000 velocities
+
+pair_style	gran/hooke/history 200000.0 NULL 50.0 NULL 0.5 0
+pair_coeff	* *
+
+neighbor	0.1 bin
+neigh_modify	every 1 delay 0
+
+timestep	0.0001
+
+group		bottom type 2
+912 atoms in group bottom
+group		active subtract all bottom
+31088 atoms in group active
+neigh_modify	exclude group bottom bottom
+
+fix		1 all gravity 1.0 chute 26.0
+fix		2 bottom freeze
+fix		3 active nve/sphere
+
+compute		1 all erotate/sphere
+thermo_style	custom step atoms ke c_1 vol
+thermo_modify	norm no
+thermo		100
+
+run		100
+Neighbor list info ...
+  2 neighbor list requests
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 1.1
+  ghost atom cutoff = 1.1
+  binsize = 0.55 -> bins = 73 37 68
+Memory usage per processor = 15.567 Mbytes
+Step Atoms KinEng 1 Volume 
+       0    32000    784139.13    1601.1263    29833.783 
+     100    32000    784292.08    1571.0968    29834.707 
+Loop time of 0.550482 on 1 procs for 100 steps with 32000 atoms
+
+Performance: 1569.534 tau/day, 181.659 timesteps/s
+100.1% CPU use with 1 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.33849    | 0.33849    | 0.33849    |   0.0 | 61.49
+Neigh   | 0.040353   | 0.040353   | 0.040353   |   0.0 |  7.33
+Comm    | 0.018023   | 0.018023   | 0.018023   |   0.0 |  3.27
+Output  | 0.00020385 | 0.00020385 | 0.00020385 |   0.0 |  0.04
+Modify  | 0.13155    | 0.13155    | 0.13155    |   0.0 | 23.90
+Other   |            | 0.02186    |            |       |  3.97
+
+Nlocal:    32000 ave 32000 max 32000 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:    5463 ave 5463 max 5463 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    115133 ave 115133 max 115133 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 115133
+Ave neighs/atom = 3.59791
+Neighbor list builds = 2
+Dangerous builds = 0
+Total wall time: 0:00:00
diff --git a/bench/log.15Feb16.chute.fixed.icc.4 b/bench/log.15Feb16.chute.fixed.icc.4
new file mode 100644
index 0000000000000000000000000000000000000000..8252408833fc32372a068751c7c440fc230a214f
--- /dev/null
+++ b/bench/log.15Feb16.chute.fixed.icc.4
@@ -0,0 +1,80 @@
+LAMMPS (15 Feb 2016)
+# LAMMPS benchmark of granular flow
+# chute flow of 32000 atoms with frozen base at 26 degrees
+
+units		lj
+atom_style	sphere
+boundary	p p fs
+newton		off
+comm_modify	vel yes
+
+read_data	data.chute
+  orthogonal box = (0 0 0) to (40 20 37.2886)
+  2 by 1 by 2 MPI processor grid
+  reading atoms ...
+  32000 atoms
+  reading velocities ...
+  32000 velocities
+
+pair_style	gran/hooke/history 200000.0 NULL 50.0 NULL 0.5 0
+pair_coeff	* *
+
+neighbor	0.1 bin
+neigh_modify	every 1 delay 0
+
+timestep	0.0001
+
+group		bottom type 2
+912 atoms in group bottom
+group		active subtract all bottom
+31088 atoms in group active
+neigh_modify	exclude group bottom bottom
+
+fix		1 all gravity 1.0 chute 26.0
+fix		2 bottom freeze
+fix		3 active nve/sphere
+
+compute		1 all erotate/sphere
+thermo_style	custom step atoms ke c_1 vol
+thermo_modify	norm no
+thermo		100
+
+run		100
+Neighbor list info ...
+  2 neighbor list requests
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 1.1
+  ghost atom cutoff = 1.1
+  binsize = 0.55 -> bins = 73 37 68
+Memory usage per processor = 6.81783 Mbytes
+Step Atoms KinEng 1 Volume 
+       0    32000    784139.13    1601.1263    29833.783 
+     100    32000    784292.08    1571.0968    29834.707 
+Loop time of 0.13141 on 4 procs for 100 steps with 32000 atoms
+
+Performance: 6574.833 tau/day, 760.976 timesteps/s
+99.3% CPU use with 4 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.062505   | 0.067      | 0.07152    |   1.5 | 50.99
+Neigh   | 0.010041   | 0.0101     | 0.010178   |   0.1 |  7.69
+Comm    | 0.012347   | 0.012895   | 0.013444   |   0.5 |  9.81
+Output  | 6.3896e-05 | 0.00010294 | 0.00014091 |   0.3 |  0.08
+Modify  | 0.031802   | 0.032348   | 0.032897   |   0.3 | 24.62
+Other   |            | 0.008965   |            |       |  6.82
+
+Nlocal:    8000 ave 8008 max 7992 min
+Histogram: 2 0 0 0 0 0 0 0 0 2
+Nghost:    2439 ave 2450 max 2428 min
+Histogram: 2 0 0 0 0 0 0 0 0 2
+Neighs:    29500.5 ave 30488 max 28513 min
+Histogram: 2 0 0 0 0 0 0 0 0 2
+
+Total # of neighbors = 118002
+Ave neighs/atom = 3.68756
+Neighbor list builds = 2
+Dangerous builds = 0
+Total wall time: 0:00:00
diff --git a/bench/log.15Feb16.chute.scaled.icc.4 b/bench/log.15Feb16.chute.scaled.icc.4
new file mode 100644
index 0000000000000000000000000000000000000000..0b9807a4477f1c14f94680d0c740434dc90542f0
--- /dev/null
+++ b/bench/log.15Feb16.chute.scaled.icc.4
@@ -0,0 +1,90 @@
+LAMMPS (15 Feb 2016)
+# LAMMPS benchmark of granular flow
+# chute flow of 32000 atoms with frozen base at 26 degrees
+
+variable	x index 1
+variable	y index 1
+
+units		lj
+atom_style	sphere
+boundary	p p fs
+newton		off
+comm_modify	vel yes
+
+read_data	data.chute
+  orthogonal box = (0 0 0) to (40 20 37.2886)
+  2 by 1 by 2 MPI processor grid
+  reading atoms ...
+  32000 atoms
+  reading velocities ...
+  32000 velocities
+
+replicate	$x $y 1
+replicate	2 $y 1
+replicate	2 2 1
+  orthogonal box = (0 0 0) to (80 40 37.2922)
+  2 by 2 by 1 MPI processor grid
+  128000 atoms
+
+pair_style	gran/hooke/history 200000.0 NULL 50.0 NULL 0.5 0
+pair_coeff	* *
+
+neighbor	0.1 bin
+neigh_modify	every 1 delay 0
+
+timestep	0.0001
+
+group		bottom type 2
+3648 atoms in group bottom
+group		active subtract all bottom
+124352 atoms in group active
+neigh_modify	exclude group bottom bottom
+
+fix		1 all gravity 1.0 chute 26.0
+fix		2 bottom freeze
+fix		3 active nve/sphere
+
+compute		1 all erotate/sphere
+thermo_style	custom step atoms ke c_1 vol
+thermo_modify	norm no
+thermo		100
+
+run		100
+Neighbor list info ...
+  2 neighbor list requests
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 1.1
+  ghost atom cutoff = 1.1
+  binsize = 0.55 -> bins = 146 73 68
+Memory usage per processor = 15.7007 Mbytes
+Step Atoms KinEng 1 Volume 
+       0   128000    3136556.5    6404.5051    119335.13 
+     100   128000    3137168.3    6284.3873    119338.83 
+Loop time of 0.906913 on 4 procs for 100 steps with 128000 atoms
+
+Performance: 952.683 tau/day, 110.264 timesteps/s
+99.7% CPU use with 4 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.51454    | 0.53094    | 0.55381    |   2.0 | 58.54
+Neigh   | 0.042597   | 0.043726   | 0.045801   |   0.6 |  4.82
+Comm    | 0.063027   | 0.064657   | 0.067367   |   0.7 |  7.13
+Output  | 0.00024891 | 0.00059718 | 0.00086498 |   1.0 |  0.07
+Modify  | 0.16508    | 0.17656    | 0.1925     |   2.6 | 19.47
+Other   |            | 0.09043    |            |       |  9.97
+
+Nlocal:    32000 ave 32000 max 32000 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+Nghost:    5463 ave 5463 max 5463 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+Neighs:    115133 ave 115133 max 115133 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 460532
+Ave neighs/atom = 3.59791
+Neighbor list builds = 2
+Dangerous builds = 0
+Total wall time: 0:00:01
diff --git a/bench/log.15Feb16.eam.fixed.icc.1 b/bench/log.15Feb16.eam.fixed.icc.1
new file mode 100644
index 0000000000000000000000000000000000000000..0408d2c8d71defc739ae1268cf5611ec80e1f092
--- /dev/null
+++ b/bench/log.15Feb16.eam.fixed.icc.1
@@ -0,0 +1,83 @@
+LAMMPS (15 Feb 2016)
+# bulk Cu lattice
+
+variable	x index 1
+variable	y index 1
+variable	z index 1
+
+variable	xx equal 20*$x
+variable	xx equal 20*1
+variable	yy equal 20*$y
+variable	yy equal 20*1
+variable	zz equal 20*$z
+variable	zz equal 20*1
+
+units		metal
+atom_style	atomic
+
+lattice		fcc 3.615
+Lattice spacing in x,y,z = 3.615 3.615 3.615
+region		box block 0 ${xx} 0 ${yy} 0 ${zz}
+region		box block 0 20 0 ${yy} 0 ${zz}
+region		box block 0 20 0 20 0 ${zz}
+region		box block 0 20 0 20 0 20
+create_box	1 box
+Created orthogonal box = (0 0 0) to (72.3 72.3 72.3)
+  1 by 1 by 1 MPI processor grid
+create_atoms	1 box
+Created 32000 atoms
+
+pair_style	eam
+pair_coeff	1 1 Cu_u3.eam
+Reading potential file Cu_u3.eam with DATE: 2007-06-11
+
+velocity	all create 1600.0 376847 loop geom
+
+neighbor	1.0 bin
+neigh_modify    every 1 delay 5 check yes
+
+fix		1 all nve
+
+timestep	0.005
+thermo		50
+
+run		100
+Neighbor list info ...
+  1 neighbor list requests
+  update every 1 steps, delay 5 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 5.95
+  ghost atom cutoff = 5.95
+  binsize = 2.975 -> bins = 25 25 25
+Memory usage per processor = 10.2238 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0         1600      -113280            0   -106662.09    18703.573 
+      50    781.69049   -109873.35            0   -106640.13    52273.088 
+     100      801.832    -109957.3            0   -106640.77    51322.821 
+Loop time of 5.90097 on 1 procs for 100 steps with 32000 atoms
+
+Performance: 7.321 ns/day, 3.278 hours/ns, 16.946 timesteps/s
+99.9% CPU use with 1 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 5.2121     | 5.2121     | 5.2121     |   0.0 | 88.33
+Neigh   | 0.58212    | 0.58212    | 0.58212    |   0.0 |  9.86
+Comm    | 0.030392   | 0.030392   | 0.030392   |   0.0 |  0.52
+Output  | 0.00023389 | 0.00023389 | 0.00023389 |   0.0 |  0.00
+Modify  | 0.060871   | 0.060871   | 0.060871   |   0.0 |  1.03
+Other   |            | 0.01527    |            |       |  0.26
+
+Nlocal:    32000 ave 32000 max 32000 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:    19909 ave 19909 max 19909 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    1.20778e+06 ave 1.20778e+06 max 1.20778e+06 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 1207784
+Ave neighs/atom = 37.7433
+Neighbor list builds = 13
+Dangerous builds = 0
+Total wall time: 0:00:06
diff --git a/bench/log.15Feb16.eam.fixed.icc.4 b/bench/log.15Feb16.eam.fixed.icc.4
new file mode 100644
index 0000000000000000000000000000000000000000..6a7c47a58b3fc4cf0076387b89bc685ac6138e66
--- /dev/null
+++ b/bench/log.15Feb16.eam.fixed.icc.4
@@ -0,0 +1,83 @@
+LAMMPS (15 Feb 2016)
+# bulk Cu lattice
+
+variable	x index 1
+variable	y index 1
+variable	z index 1
+
+variable	xx equal 20*$x
+variable	xx equal 20*1
+variable	yy equal 20*$y
+variable	yy equal 20*1
+variable	zz equal 20*$z
+variable	zz equal 20*1
+
+units		metal
+atom_style	atomic
+
+lattice		fcc 3.615
+Lattice spacing in x,y,z = 3.615 3.615 3.615
+region		box block 0 ${xx} 0 ${yy} 0 ${zz}
+region		box block 0 20 0 ${yy} 0 ${zz}
+region		box block 0 20 0 20 0 ${zz}
+region		box block 0 20 0 20 0 20
+create_box	1 box
+Created orthogonal box = (0 0 0) to (72.3 72.3 72.3)
+  1 by 2 by 2 MPI processor grid
+create_atoms	1 box
+Created 32000 atoms
+
+pair_style	eam
+pair_coeff	1 1 Cu_u3.eam
+Reading potential file Cu_u3.eam with DATE: 2007-06-11
+
+velocity	all create 1600.0 376847 loop geom
+
+neighbor	1.0 bin
+neigh_modify    every 1 delay 5 check yes
+
+fix		1 all nve
+
+timestep	0.005
+thermo		50
+
+run		100
+Neighbor list info ...
+  1 neighbor list requests
+  update every 1 steps, delay 5 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 5.95
+  ghost atom cutoff = 5.95
+  binsize = 2.975 -> bins = 25 25 25
+Memory usage per processor = 5.09629 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0         1600      -113280            0   -106662.09    18703.573 
+      50    781.69049   -109873.35            0   -106640.13    52273.088 
+     100      801.832    -109957.3            0   -106640.77    51322.821 
+Loop time of 1.58019 on 4 procs for 100 steps with 32000 atoms
+
+Performance: 27.338 ns/day, 0.878 hours/ns, 63.284 timesteps/s
+99.8% CPU use with 4 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 1.3617     | 1.366      | 1.3723     |   0.4 | 86.45
+Neigh   | 0.15123    | 0.15232    | 0.15374    |   0.2 |  9.64
+Comm    | 0.033429   | 0.041275   | 0.047066   |   2.7 |  2.61
+Output  | 0.00011301 | 0.0001573  | 0.000211   |   0.3 |  0.01
+Modify  | 0.014694   | 0.015085   | 0.015421   |   0.2 |  0.95
+Other   |            | 0.005342   |            |       |  0.34
+
+Nlocal:    8000 ave 8008 max 7993 min
+Histogram: 2 0 0 0 0 0 0 0 1 1
+Nghost:    9130.25 ave 9138 max 9122 min
+Histogram: 2 0 0 0 0 0 0 0 0 2
+Neighs:    301946 ave 302392 max 301360 min
+Histogram: 1 0 0 0 1 0 0 0 1 1
+
+Total # of neighbors = 1207784
+Ave neighs/atom = 37.7433
+Neighbor list builds = 13
+Dangerous builds = 0
+Total wall time: 0:00:01
diff --git a/bench/log.15Feb16.eam.scaled.icc.4 b/bench/log.15Feb16.eam.scaled.icc.4
new file mode 100644
index 0000000000000000000000000000000000000000..2c1cd3273ee6c31c6e621fd36ee78c5de635ac18
--- /dev/null
+++ b/bench/log.15Feb16.eam.scaled.icc.4
@@ -0,0 +1,83 @@
+LAMMPS (15 Feb 2016)
+# bulk Cu lattice
+
+variable	x index 1
+variable	y index 1
+variable	z index 1
+
+variable	xx equal 20*$x
+variable	xx equal 20*2
+variable	yy equal 20*$y
+variable	yy equal 20*2
+variable	zz equal 20*$z
+variable	zz equal 20*1
+
+units		metal
+atom_style	atomic
+
+lattice		fcc 3.615
+Lattice spacing in x,y,z = 3.615 3.615 3.615
+region		box block 0 ${xx} 0 ${yy} 0 ${zz}
+region		box block 0 40 0 ${yy} 0 ${zz}
+region		box block 0 40 0 40 0 ${zz}
+region		box block 0 40 0 40 0 20
+create_box	1 box
+Created orthogonal box = (0 0 0) to (144.6 144.6 72.3)
+  2 by 2 by 1 MPI processor grid
+create_atoms	1 box
+Created 128000 atoms
+
+pair_style	eam
+pair_coeff	1 1 Cu_u3.eam
+Reading potential file Cu_u3.eam with DATE: 2007-06-11
+
+velocity	all create 1600.0 376847 loop geom
+
+neighbor	1.0 bin
+neigh_modify    every 1 delay 5 check yes
+
+fix		1 all nve
+
+timestep	0.005
+thermo		50
+
+run		100
+Neighbor list info ...
+  1 neighbor list requests
+  update every 1 steps, delay 5 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 5.95
+  ghost atom cutoff = 5.95
+  binsize = 2.975 -> bins = 49 49 25
+Memory usage per processor = 10.1402 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0         1600      -453120            0   -426647.73    18704.012 
+      50    779.50001   -439457.02            0   -426560.06    52355.276 
+     100    797.97828   -439764.76            0   -426562.07     51474.74 
+Loop time of 6.46849 on 4 procs for 100 steps with 128000 atoms
+
+Performance: 6.679 ns/day, 3.594 hours/ns, 15.460 timesteps/s
+99.9% CPU use with 4 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 5.581      | 5.5997     | 5.6265     |   0.8 | 86.57
+Neigh   | 0.65287    | 0.658      | 0.66374    |   0.5 | 10.17
+Comm    | 0.075706   | 0.11015    | 0.13655    |   7.2 |  1.70
+Output  | 0.00026488 | 0.00028312 | 0.00029302 |   0.1 |  0.00
+Modify  | 0.069607   | 0.072407   | 0.074555   |   0.7 |  1.12
+Other   |            | 0.02794    |            |       |  0.43
+
+Nlocal:    32000 ave 32092 max 31914 min
+Histogram: 1 0 0 1 0 1 0 0 0 1
+Nghost:    19910 ave 19997 max 19818 min
+Histogram: 1 0 0 0 1 0 1 0 0 1
+Neighs:    1.20728e+06 ave 1.21142e+06 max 1.2036e+06 min
+Histogram: 1 0 0 1 1 0 0 0 0 1
+
+Total # of neighbors = 4829126
+Ave neighs/atom = 37.7275
+Neighbor list builds = 14
+Dangerous builds = 0
+Total wall time: 0:00:06
diff --git a/bench/log.15Feb16.lj.fixed.icc.1 b/bench/log.15Feb16.lj.fixed.icc.1
new file mode 100644
index 0000000000000000000000000000000000000000..e889fba2258b57b6d2a0140434c178c8156aa6f9
--- /dev/null
+++ b/bench/log.15Feb16.lj.fixed.icc.1
@@ -0,0 +1,79 @@
+LAMMPS (15 Feb 2016)
+# 3d Lennard-Jones melt
+
+variable	x index 1
+variable	y index 1
+variable	z index 1
+
+variable	xx equal 20*$x
+variable	xx equal 20*1
+variable	yy equal 20*$y
+variable	yy equal 20*1
+variable	zz equal 20*$z
+variable	zz equal 20*1
+
+units		lj
+atom_style	atomic
+
+lattice		fcc 0.8442
+Lattice spacing in x,y,z = 1.6796 1.6796 1.6796
+region		box block 0 ${xx} 0 ${yy} 0 ${zz}
+region		box block 0 20 0 ${yy} 0 ${zz}
+region		box block 0 20 0 20 0 ${zz}
+region		box block 0 20 0 20 0 20
+create_box	1 box
+Created orthogonal box = (0 0 0) to (33.5919 33.5919 33.5919)
+  1 by 1 by 1 MPI processor grid
+create_atoms	1 box
+Created 32000 atoms
+mass		1 1.0
+
+velocity	all create 1.44 87287 loop geom
+
+pair_style	lj/cut 2.5
+pair_coeff	1 1 1.0 1.0 2.5
+
+neighbor	0.3 bin
+neigh_modify	delay 0 every 20 check no
+
+fix		1 all nve
+
+run		100
+Neighbor list info ...
+  1 neighbor list requests
+  update every 20 steps, delay 0 steps, check no
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2.8
+  ghost atom cutoff = 2.8
+  binsize = 1.4 -> bins = 24 24 24
+Memory usage per processor = 8.21387 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0         1.44   -6.7733681            0   -4.6134356   -5.0197073 
+     100    0.7574531   -5.7585055            0   -4.6223613   0.20726105 
+Loop time of 2.26309 on 1 procs for 100 steps with 32000 atoms
+
+Performance: 19088.920 tau/day, 44.187 timesteps/s
+99.9% CPU use with 1 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 1.9341     | 1.9341     | 1.9341     |   0.0 | 85.46
+Neigh   | 0.2442     | 0.2442     | 0.2442     |   0.0 | 10.79
+Comm    | 0.024158   | 0.024158   | 0.024158   |   0.0 |  1.07
+Output  | 0.00011611 | 0.00011611 | 0.00011611 |   0.0 |  0.01
+Modify  | 0.053222   | 0.053222   | 0.053222   |   0.0 |  2.35
+Other   |            | 0.007258   |            |       |  0.32
+
+Nlocal:    32000 ave 32000 max 32000 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:    19657 ave 19657 max 19657 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    1.20283e+06 ave 1.20283e+06 max 1.20283e+06 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 1202833
+Ave neighs/atom = 37.5885
+Neighbor list builds = 5
+Dangerous builds not checked
+Total wall time: 0:00:02
diff --git a/bench/log.15Feb16.lj.fixed.icc.4 b/bench/log.15Feb16.lj.fixed.icc.4
new file mode 100644
index 0000000000000000000000000000000000000000..53cdcdbee6f3186ee7a86a75e12ea01c839b9c76
--- /dev/null
+++ b/bench/log.15Feb16.lj.fixed.icc.4
@@ -0,0 +1,79 @@
+LAMMPS (15 Feb 2016)
+# 3d Lennard-Jones melt
+
+variable	x index 1
+variable	y index 1
+variable	z index 1
+
+variable	xx equal 20*$x
+variable	xx equal 20*1
+variable	yy equal 20*$y
+variable	yy equal 20*1
+variable	zz equal 20*$z
+variable	zz equal 20*1
+
+units		lj
+atom_style	atomic
+
+lattice		fcc 0.8442
+Lattice spacing in x,y,z = 1.6796 1.6796 1.6796
+region		box block 0 ${xx} 0 ${yy} 0 ${zz}
+region		box block 0 20 0 ${yy} 0 ${zz}
+region		box block 0 20 0 20 0 ${zz}
+region		box block 0 20 0 20 0 20
+create_box	1 box
+Created orthogonal box = (0 0 0) to (33.5919 33.5919 33.5919)
+  1 by 2 by 2 MPI processor grid
+create_atoms	1 box
+Created 32000 atoms
+mass		1 1.0
+
+velocity	all create 1.44 87287 loop geom
+
+pair_style	lj/cut 2.5
+pair_coeff	1 1 1.0 1.0 2.5
+
+neighbor	0.3 bin
+neigh_modify	delay 0 every 20 check no
+
+fix		1 all nve
+
+run		100
+Neighbor list info ...
+  1 neighbor list requests
+  update every 20 steps, delay 0 steps, check no
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2.8
+  ghost atom cutoff = 2.8
+  binsize = 1.4 -> bins = 24 24 24
+Memory usage per processor = 4.09506 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0         1.44   -6.7733681            0   -4.6134356   -5.0197073 
+     100    0.7574531   -5.7585055            0   -4.6223613   0.20726105 
+Loop time of 0.640733 on 4 procs for 100 steps with 32000 atoms
+
+Performance: 67422.779 tau/day, 156.071 timesteps/s
+99.7% CPU use with 4 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.49487    | 0.51733    | 0.5322     |   1.9 | 80.74
+Neigh   | 0.061131   | 0.063685   | 0.065433   |   0.6 |  9.94
+Comm    | 0.02457    | 0.042349   | 0.069598   |   8.1 |  6.61
+Output  | 5.9843e-05 | 6.3181e-05 | 6.6996e-05 |   0.0 |  0.01
+Modify  | 0.012961   | 0.013863   | 0.014491   |   0.5 |  2.16
+Other   |            | 0.003448   |            |       |  0.54
+
+Nlocal:    8000 ave 8037 max 7964 min
+Histogram: 2 0 0 0 0 0 0 0 1 1
+Nghost:    9007.5 ave 9050 max 8968 min
+Histogram: 1 1 0 0 0 0 0 1 0 1
+Neighs:    300708 ave 305113 max 297203 min
+Histogram: 1 0 0 1 1 0 0 0 0 1
+
+Total # of neighbors = 1202833
+Ave neighs/atom = 37.5885
+Neighbor list builds = 5
+Dangerous builds not checked
+Total wall time: 0:00:00
diff --git a/bench/log.15Feb16.lj.scaled.icc.4 b/bench/log.15Feb16.lj.scaled.icc.4
new file mode 100644
index 0000000000000000000000000000000000000000..7676040c5481c5e29de40da4b781524591231ebe
--- /dev/null
+++ b/bench/log.15Feb16.lj.scaled.icc.4
@@ -0,0 +1,79 @@
+LAMMPS (15 Feb 2016)
+# 3d Lennard-Jones melt
+
+variable	x index 1
+variable	y index 1
+variable	z index 1
+
+variable	xx equal 20*$x
+variable	xx equal 20*2
+variable	yy equal 20*$y
+variable	yy equal 20*2
+variable	zz equal 20*$z
+variable	zz equal 20*1
+
+units		lj
+atom_style	atomic
+
+lattice		fcc 0.8442
+Lattice spacing in x,y,z = 1.6796 1.6796 1.6796
+region		box block 0 ${xx} 0 ${yy} 0 ${zz}
+region		box block 0 40 0 ${yy} 0 ${zz}
+region		box block 0 40 0 40 0 ${zz}
+region		box block 0 40 0 40 0 20
+create_box	1 box
+Created orthogonal box = (0 0 0) to (67.1838 67.1838 33.5919)
+  2 by 2 by 1 MPI processor grid
+create_atoms	1 box
+Created 128000 atoms
+mass		1 1.0
+
+velocity	all create 1.44 87287 loop geom
+
+pair_style	lj/cut 2.5
+pair_coeff	1 1 1.0 1.0 2.5
+
+neighbor	0.3 bin
+neigh_modify	delay 0 every 20 check no
+
+fix		1 all nve
+
+run		100
+Neighbor list info ...
+  1 neighbor list requests
+  update every 20 steps, delay 0 steps, check no
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2.8
+  ghost atom cutoff = 2.8
+  binsize = 1.4 -> bins = 48 48 24
+Memory usage per processor = 8.13678 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0         1.44   -6.7733681            0   -4.6133849   -5.0196788 
+     100   0.75841891    -5.759957            0   -4.6223375   0.20008866 
+Loop time of 2.57914 on 4 procs for 100 steps with 128000 atoms
+
+Performance: 16749.768 tau/day, 38.773 timesteps/s
+99.8% CPU use with 4 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 2.042      | 2.1092     | 2.1668     |   3.1 | 81.78
+Neigh   | 0.23982    | 0.24551    | 0.25233    |   1.0 |  9.52
+Comm    | 0.067088   | 0.13887    | 0.22681    |  15.7 |  5.38
+Output  | 0.00013185 | 0.00021666 | 0.00027108 |   0.4 |  0.01
+Modify  | 0.060348   | 0.071269   | 0.077063   |   2.5 |  2.76
+Other   |            | 0.01403    |            |       |  0.54
+
+Nlocal:    32000 ave 32060 max 31939 min
+Histogram: 1 0 1 0 0 0 0 1 0 1
+Nghost:    19630.8 ave 19681 max 19562 min
+Histogram: 1 0 0 0 1 0 0 0 1 1
+Neighs:    1.20195e+06 ave 1.20354e+06 max 1.19931e+06 min
+Histogram: 1 0 0 0 0 0 0 2 0 1
+
+Total # of neighbors = 4807797
+Ave neighs/atom = 37.5609
+Neighbor list builds = 5
+Dangerous builds not checked
+Total wall time: 0:00:02
diff --git a/bench/log.15Feb16.rhodo.fixed.icc.1 b/bench/log.15Feb16.rhodo.fixed.icc.1
new file mode 100644
index 0000000000000000000000000000000000000000..994c69e585f04edd5d3fb36bb19005b6491c22af
--- /dev/null
+++ b/bench/log.15Feb16.rhodo.fixed.icc.1
@@ -0,0 +1,121 @@
+LAMMPS (15 Feb 2016)
+# Rhodopsin model
+
+units           real
+neigh_modify    delay 5 every 1
+
+atom_style      full
+bond_style      harmonic
+angle_style     charmm
+dihedral_style  charmm
+improper_style  harmonic
+pair_style      lj/charmm/coul/long 8.0 10.0
+pair_modify     mix arithmetic
+kspace_style    pppm 1e-4
+
+read_data       data.rhodo
+  orthogonal box = (-27.5 -38.5 -36.3646) to (27.5 38.5 36.3615)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  32000 atoms
+  reading velocities ...
+  32000 velocities
+  scanning bonds ...
+  4 = max bonds/atom
+  scanning angles ...
+  8 = max angles/atom
+  scanning dihedrals ...
+  18 = max dihedrals/atom
+  scanning impropers ...
+  2 = max impropers/atom
+  reading bonds ...
+  27723 bonds
+  reading angles ...
+  40467 angles
+  reading dihedrals ...
+  56829 dihedrals
+  reading impropers ...
+  1034 impropers
+  4 = max # of 1-2 neighbors
+  12 = max # of 1-3 neighbors
+  24 = max # of 1-4 neighbors
+  26 = max # of special neighbors
+
+fix             1 all shake 0.0001 5 0 m 1.0 a 232
+  1617 = # of size 2 clusters
+  3633 = # of size 3 clusters
+  747 = # of size 4 clusters
+  4233 = # of frozen angles
+fix             2 all npt temp 300.0 300.0 100.0 		z 0.0 0.0 1000.0 mtk no pchain 0 tchain 1
+
+special_bonds   charmm
+
+thermo          50
+thermo_style    multi
+timestep        2.0
+
+run		100
+PPPM initialization ...
+  G vector (1/distance) = 0.248835
+  grid = 25 32 32
+  stencil order = 5
+  estimated absolute RMS force accuracy = 0.0355478
+  estimated relative force accuracy = 0.000107051
+  using double precision FFTs
+  3d grid and FFT values/proc = 41070 25600
+Neighbor list info ...
+  1 neighbor list requests
+  update every 1 steps, delay 5 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 12
+  ghost atom cutoff = 12
+  binsize = 6 -> bins = 10 13 13
+Memory usage per processor = 91.7487 Mbytes
+---------------- Step        0 ----- CPU =      0.0000 (sec) ----------------
+TotEng   =    -25356.2064 KinEng   =     21444.8313 Temp     =       299.0397 
+PotEng   =    -46801.0377 E_bond   =      2537.9940 E_angle  =     10921.3742 
+E_dihed  =      5211.7865 E_impro  =       213.5116 E_vdwl   =     -2307.8634 
+E_coul   =    207025.8927 E_long   =   -270403.7333 Press    =      -142.6035 
+Volume   =    307995.0335 
+---------------- Step       50 ----- CPU =     17.6362 (sec) ----------------
+TotEng   =    -25330.0828 KinEng   =     21501.0029 Temp     =       299.8230 
+PotEng   =    -46831.0857 E_bond   =      2471.7004 E_angle  =     10836.4975 
+E_dihed  =      5239.6299 E_impro  =       227.1218 E_vdwl   =     -1993.2754 
+E_coul   =    206797.6331 E_long   =   -270410.3930 Press    =       237.6701 
+Volume   =    308031.5639 
+---------------- Step      100 ----- CPU =     35.9089 (sec) ----------------
+TotEng   =    -25290.7593 KinEng   =     21592.0117 Temp     =       301.0920 
+PotEng   =    -46882.7709 E_bond   =      2567.9807 E_angle  =     10781.9408 
+E_dihed  =      5198.7432 E_impro  =       216.7834 E_vdwl   =     -1902.4783 
+E_coul   =    206659.2326 E_long   =   -270404.9733 Press    =         6.9960 
+Volume   =    308133.9888 
+Loop time of 35.9089 on 1 procs for 100 steps with 32000 atoms
+
+Performance: 0.481 ns/day, 49.874 hours/ns, 2.785 timesteps/s
+99.9% CPU use with 1 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 25.731     | 25.731     | 25.731     |   0.0 | 71.66
+Bond    | 1.2771     | 1.2771     | 1.2771     |   0.0 |  3.56
+Kspace  | 3.2094     | 3.2094     | 3.2094     |   0.0 |  8.94
+Neigh   | 4.4538     | 4.4538     | 4.4538     |   0.0 | 12.40
+Comm    | 0.068507   | 0.068507   | 0.068507   |   0.0 |  0.19
+Output  | 0.00025916 | 0.00025916 | 0.00025916 |   0.0 |  0.00
+Modify  | 1.1417     | 1.1417     | 1.1417     |   0.0 |  3.18
+Other   |            | 0.027      |            |       |  0.08
+
+Nlocal:    32000 ave 32000 max 32000 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:    47958 ave 47958 max 47958 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    1.20281e+07 ave 1.20281e+07 max 1.20281e+07 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 12028107
+Ave neighs/atom = 375.878
+Ave special neighs/atom = 7.43187
+Neighbor list builds = 11
+Dangerous builds = 0
+Total wall time: 0:00:37
diff --git a/bench/log.15Feb16.rhodo.fixed.icc.4 b/bench/log.15Feb16.rhodo.fixed.icc.4
new file mode 100644
index 0000000000000000000000000000000000000000..dfa5b065f917dd08a3f19d1cb2f188eae64e2fa7
--- /dev/null
+++ b/bench/log.15Feb16.rhodo.fixed.icc.4
@@ -0,0 +1,121 @@
+LAMMPS (15 Feb 2016)
+# Rhodopsin model
+
+units           real
+neigh_modify    delay 5 every 1
+
+atom_style      full
+bond_style      harmonic
+angle_style     charmm
+dihedral_style  charmm
+improper_style  harmonic
+pair_style      lj/charmm/coul/long 8.0 10.0
+pair_modify     mix arithmetic
+kspace_style    pppm 1e-4
+
+read_data       data.rhodo
+  orthogonal box = (-27.5 -38.5 -36.3646) to (27.5 38.5 36.3615)
+  1 by 2 by 2 MPI processor grid
+  reading atoms ...
+  32000 atoms
+  reading velocities ...
+  32000 velocities
+  scanning bonds ...
+  4 = max bonds/atom
+  scanning angles ...
+  8 = max angles/atom
+  scanning dihedrals ...
+  18 = max dihedrals/atom
+  scanning impropers ...
+  2 = max impropers/atom
+  reading bonds ...
+  27723 bonds
+  reading angles ...
+  40467 angles
+  reading dihedrals ...
+  56829 dihedrals
+  reading impropers ...
+  1034 impropers
+  4 = max # of 1-2 neighbors
+  12 = max # of 1-3 neighbors
+  24 = max # of 1-4 neighbors
+  26 = max # of special neighbors
+
+fix             1 all shake 0.0001 5 0 m 1.0 a 232
+  1617 = # of size 2 clusters
+  3633 = # of size 3 clusters
+  747 = # of size 4 clusters
+  4233 = # of frozen angles
+fix             2 all npt temp 300.0 300.0 100.0 		z 0.0 0.0 1000.0 mtk no pchain 0 tchain 1
+
+special_bonds   charmm
+
+thermo          50
+thermo_style    multi
+timestep        2.0
+
+run		100
+PPPM initialization ...
+  G vector (1/distance) = 0.248835
+  grid = 25 32 32
+  stencil order = 5
+  estimated absolute RMS force accuracy = 0.0355478
+  estimated relative force accuracy = 0.000107051
+  using double precision FFTs
+  3d grid and FFT values/proc = 13230 6400
+Neighbor list info ...
+  1 neighbor list requests
+  update every 1 steps, delay 5 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 12
+  ghost atom cutoff = 12
+  binsize = 6 -> bins = 10 13 13
+Memory usage per processor = 36.629 Mbytes
+---------------- Step        0 ----- CPU =      0.0000 (sec) ----------------
+TotEng   =    -25356.2064 KinEng   =     21444.8313 Temp     =       299.0397 
+PotEng   =    -46801.0377 E_bond   =      2537.9940 E_angle  =     10921.3742 
+E_dihed  =      5211.7865 E_impro  =       213.5116 E_vdwl   =     -2307.8634 
+E_coul   =    207025.8927 E_long   =   -270403.7333 Press    =      -142.6035 
+Volume   =    307995.0335 
+---------------- Step       50 ----- CPU =      4.7461 (sec) ----------------
+TotEng   =    -25330.0828 KinEng   =     21501.0029 Temp     =       299.8230 
+PotEng   =    -46831.0857 E_bond   =      2471.7004 E_angle  =     10836.4975 
+E_dihed  =      5239.6299 E_impro  =       227.1218 E_vdwl   =     -1993.2754 
+E_coul   =    206797.6331 E_long   =   -270410.3930 Press    =       237.6701 
+Volume   =    308031.5639 
+---------------- Step      100 ----- CPU =      9.6332 (sec) ----------------
+TotEng   =    -25290.7591 KinEng   =     21592.0117 Temp     =       301.0920 
+PotEng   =    -46882.7708 E_bond   =      2567.9807 E_angle  =     10781.9408 
+E_dihed  =      5198.7432 E_impro  =       216.7834 E_vdwl   =     -1902.4783 
+E_coul   =    206659.2327 E_long   =   -270404.9733 Press    =         6.9960 
+Volume   =    308133.9888 
+Loop time of 9.63322 on 4 procs for 100 steps with 32000 atoms
+
+Performance: 1.794 ns/day, 13.379 hours/ns, 10.381 timesteps/s
+99.9% CPU use with 4 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 6.4364     | 6.5993     | 6.7208     |   4.7 | 68.51
+Bond    | 0.30755    | 0.32435    | 0.35704    |   3.4 |  3.37
+Kspace  | 0.92248    | 1.0782     | 1.2597     |  13.0 | 11.19
+Neigh   | 1.1669     | 1.1672     | 1.1675     |   0.0 | 12.12
+Comm    | 0.094674   | 0.098065   | 0.10543    |   1.4 |  1.02
+Output  | 0.00015521 | 0.00016224 | 0.00018215 |   0.1 |  0.00
+Modify  | 0.32982    | 0.34654    | 0.35365    |   1.6 |  3.60
+Other   |            | 0.01943    |            |       |  0.20
+
+Nlocal:    8000 ave 8143 max 7933 min
+Histogram: 1 2 0 0 0 0 0 0 0 1
+Nghost:    22733.5 ave 22769 max 22693 min
+Histogram: 1 0 0 0 0 2 0 0 0 1
+Neighs:    3.00703e+06 ave 3.0975e+06 max 2.96493e+06 min
+Histogram: 1 2 0 0 0 0 0 0 0 1
+
+Total # of neighbors = 12028107
+Ave neighs/atom = 375.878
+Ave special neighs/atom = 7.43187
+Neighbor list builds = 11
+Dangerous builds = 0
+Total wall time: 0:00:10
diff --git a/bench/log.15Feb16.rhodo.scaled.icc.4 b/bench/log.15Feb16.rhodo.scaled.icc.4
new file mode 100644
index 0000000000000000000000000000000000000000..73cdb163b00853f4bdaf18666a3b0f2dd7ed0936
--- /dev/null
+++ b/bench/log.15Feb16.rhodo.scaled.icc.4
@@ -0,0 +1,142 @@
+LAMMPS (15 Feb 2016)
+# Rhodopsin model
+
+variable	x index 1
+variable	y index 1
+variable	z index 1
+
+units           real
+neigh_modify    delay 5 every 1
+
+atom_style      full
+atom_modify	map hash
+bond_style      harmonic
+angle_style     charmm
+dihedral_style  charmm
+improper_style  harmonic
+pair_style      lj/charmm/coul/long 8.0 10.0
+pair_modify     mix arithmetic
+kspace_style    pppm 1e-4
+
+read_data       data.rhodo
+  orthogonal box = (-27.5 -38.5 -36.3646) to (27.5 38.5 36.3615)
+  1 by 2 by 2 MPI processor grid
+  reading atoms ...
+  32000 atoms
+  reading velocities ...
+  32000 velocities
+  scanning bonds ...
+  4 = max bonds/atom
+  scanning angles ...
+  8 = max angles/atom
+  scanning dihedrals ...
+  18 = max dihedrals/atom
+  scanning impropers ...
+  2 = max impropers/atom
+  reading bonds ...
+  27723 bonds
+  reading angles ...
+  40467 angles
+  reading dihedrals ...
+  56829 dihedrals
+  reading impropers ...
+  1034 impropers
+  4 = max # of 1-2 neighbors
+  12 = max # of 1-3 neighbors
+  24 = max # of 1-4 neighbors
+  26 = max # of special neighbors
+
+replicate	$x $y $z
+replicate	2 $y $z
+replicate	2 2 $z
+replicate	2 2 1
+  orthogonal box = (-27.5 -38.5 -36.3646) to (82.5 115.5 36.3615)
+  2 by 2 by 1 MPI processor grid
+  128000 atoms
+  110892 bonds
+  161868 angles
+  227316 dihedrals
+  4136 impropers
+  4 = max # of 1-2 neighbors
+  12 = max # of 1-3 neighbors
+  24 = max # of 1-4 neighbors
+  26 = max # of special neighbors
+
+fix             1 all shake 0.0001 5 0 m 1.0 a 232
+  6468 = # of size 2 clusters
+  14532 = # of size 3 clusters
+  2988 = # of size 4 clusters
+  16932 = # of frozen angles
+fix             2 all npt temp 300.0 300.0 100.0 		z 0.0 0.0 1000.0 mtk no pchain 0 tchain 1
+
+special_bonds   charmm
+
+thermo          50
+thermo_style    multi
+timestep        2.0
+
+run		100
+PPPM initialization ...
+  G vector (1/distance) = 0.248593
+  grid = 48 60 36
+  stencil order = 5
+  estimated absolute RMS force accuracy = 0.0359793
+  estimated relative force accuracy = 0.00010835
+  using double precision FFTs
+  3d grid and FFT values/proc = 41615 25920
+Neighbor list info ...
+  1 neighbor list requests
+  update every 1 steps, delay 5 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 12
+  ghost atom cutoff = 12
+  binsize = 6 -> bins = 19 26 13
+Memory usage per processor = 95.5339 Mbytes
+---------------- Step        0 ----- CPU =      0.0000 (sec) ----------------
+TotEng   =   -101425.4887 KinEng   =     85779.3251 Temp     =       299.0304 
+PotEng   =   -187204.8138 E_bond   =     10151.9760 E_angle  =     43685.4968 
+E_dihed  =     20847.1460 E_impro  =       854.0463 E_vdwl   =     -9231.4537 
+E_coul   =    827053.5824 E_long   =  -1080565.6077 Press    =      -142.3092 
+Volume   =   1231980.1340 
+---------------- Step       50 ----- CPU =     18.7806 (sec) ----------------
+TotEng   =   -101320.2677 KinEng   =     86003.4837 Temp     =       299.8118 
+PotEng   =   -187323.7514 E_bond   =      9887.1072 E_angle  =     43346.7922 
+E_dihed  =     20958.7032 E_impro  =       908.4715 E_vdwl   =     -7973.4457 
+E_coul   =    826141.3831 E_long   =  -1080592.7629 Press    =       238.0161 
+Volume   =   1232126.1855 
+---------------- Step      100 ----- CPU =     38.3684 (sec) ----------------
+TotEng   =   -101158.1849 KinEng   =     86355.6149 Temp     =       301.0393 
+PotEng   =   -187513.7998 E_bond   =     10272.0693 E_angle  =     43128.6454 
+E_dihed  =     20793.9759 E_impro  =       867.0826 E_vdwl   =     -7586.7186 
+E_coul   =    825583.7122 E_long   =  -1080572.5667 Press    =        15.2151 
+Volume   =   1232535.8423 
+Loop time of 38.3684 on 4 procs for 100 steps with 128000 atoms
+
+Performance: 0.450 ns/day, 53.289 hours/ns, 2.606 timesteps/s
+99.9% CPU use with 4 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 26.205     | 26.538     | 26.911     |   5.0 | 69.17
+Bond    | 1.298      | 1.3125     | 1.3277     |   1.0 |  3.42
+Kspace  | 3.7099     | 4.0992     | 4.4422     |  13.3 | 10.68
+Neigh   | 4.6137     | 4.6144     | 4.615      |   0.0 | 12.03
+Comm    | 0.21398    | 0.21992    | 0.22886    |   1.2 |  0.57
+Output  | 0.00030518 | 0.00031543 | 0.00033307 |   0.1 |  0.00
+Modify  | 1.5066     | 1.5232     | 1.5388     |   1.0 |  3.97
+Other   |            | 0.06051    |            |       |  0.16
+
+Nlocal:    32000 ave 32000 max 32000 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+Nghost:    47957 ave 47957 max 47957 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+Neighs:    1.20281e+07 ave 1.20572e+07 max 1.1999e+07 min
+Histogram: 2 0 0 0 0 0 0 0 0 2
+
+Total # of neighbors = 48112472
+Ave neighs/atom = 375.879
+Ave special neighs/atom = 7.43187
+Neighbor list builds = 11
+Dangerous builds = 0
+Total wall time: 0:00:39