From ce59e32d1cf621ff59da11f7eaaa34aef10f36ae Mon Sep 17 00:00:00 2001
From: sjplimp <sjplimp@f3b2605a-c512-4ea7-a41b-209d697bcdaa>
Date: Fri, 28 Aug 2015 20:00:56 +0000
Subject: [PATCH] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@13937
 f3b2605a-c512-4ea7-a41b-209d697bcdaa

---
 src/GPU/fix_gpu.cpp                           |   2 +-
 src/KOKKOS/verlet_kokkos.cpp                  |  51 +-
 src/KSPACE/fix_tune_kspace.cpp                |   4 +-
 src/REPLICA/neb.cpp                           |   8 +-
 src/REPLICA/prd.cpp                           |  84 ++--
 src/REPLICA/tad.cpp                           |  92 ++--
 src/REPLICA/temper.cpp                        |   4 +-
 src/REPLICA/verlet_split.cpp                  |  26 +-
 src/USER-CUDA/verlet_cuda.cpp                 |  36 +-
 src/USER-FEP/compute_fep.cpp                  |   8 +-
 src/USER-INTEL/verlet_intel.cpp               |  41 +-
 src/USER-INTEL/verlet_split_intel.cpp         |  30 +-
 src/USER-OMP/angle_charmm_omp.cpp             |   2 +
 src/USER-OMP/angle_class2_omp.cpp             |   2 +
 src/USER-OMP/angle_cosine_delta_omp.cpp       |   2 +
 src/USER-OMP/angle_cosine_omp.cpp             |   2 +
 src/USER-OMP/angle_cosine_periodic_omp.cpp    |   2 +
 src/USER-OMP/angle_cosine_shift_exp_omp.cpp   |   2 +
 src/USER-OMP/angle_cosine_shift_omp.cpp       |   2 +
 src/USER-OMP/angle_cosine_squared_omp.cpp     |   2 +
 src/USER-OMP/angle_dipole_omp.cpp             |   2 +
 src/USER-OMP/angle_fourier_omp.cpp            |   2 +
 src/USER-OMP/angle_fourier_simple_omp.cpp     |   2 +
 src/USER-OMP/angle_harmonic_omp.cpp           |   2 +
 src/USER-OMP/angle_quartic_omp.cpp            |   2 +
 src/USER-OMP/angle_sdk_omp.cpp                |   2 +
 src/USER-OMP/angle_table_omp.cpp              |   2 +
 src/USER-OMP/bond_class2_omp.cpp              |   2 +
 src/USER-OMP/bond_fene_expand_omp.cpp         |   2 +
 src/USER-OMP/bond_fene_omp.cpp                |   2 +
 src/USER-OMP/bond_harmonic_omp.cpp            |   2 +
 src/USER-OMP/bond_harmonic_shift_cut_omp.cpp  |   2 +
 src/USER-OMP/bond_harmonic_shift_omp.cpp      |   2 +
 src/USER-OMP/bond_morse_omp.cpp               |   2 +
 src/USER-OMP/bond_nonlinear_omp.cpp           |   2 +
 src/USER-OMP/bond_quartic_omp.cpp             |   2 +
 src/USER-OMP/bond_table_omp.cpp               |   2 +
 src/USER-OMP/dihedral_charmm_omp.cpp          |   2 +
 src/USER-OMP/dihedral_class2_omp.cpp          |   2 +
 .../dihedral_cosine_shift_exp_omp.cpp         |   2 +
 src/USER-OMP/dihedral_fourier_omp.cpp         |   2 +
 src/USER-OMP/dihedral_harmonic_omp.cpp        |   2 +
 src/USER-OMP/dihedral_helix_omp.cpp           |   2 +
 src/USER-OMP/dihedral_multi_harmonic_omp.cpp  |   2 +
 src/USER-OMP/dihedral_nharmonic_omp.cpp       |   2 +
 src/USER-OMP/dihedral_opls_omp.cpp            |   2 +
 src/USER-OMP/dihedral_quadratic_omp.cpp       |   2 +
 src/USER-OMP/dihedral_table_omp.cpp           |   2 +
 src/USER-OMP/ewald_omp.cpp                    |   2 +
 src/USER-OMP/fix_omp.cpp                      |  31 +-
 src/USER-OMP/fix_omp.h                        |   2 +
 src/USER-OMP/improper_class2_omp.cpp          |   2 +
 src/USER-OMP/improper_cossq_omp.cpp           |   2 +
 src/USER-OMP/improper_cvff_omp.cpp            |   2 +
 src/USER-OMP/improper_fourier_omp.cpp         |  14 +-
 src/USER-OMP/improper_harmonic_omp.cpp        |   2 +
 src/USER-OMP/improper_ring_omp.cpp            |   2 +
 src/USER-OMP/improper_umbrella_omp.cpp        |   2 +
 src/USER-OMP/msm_cg_omp.cpp                   |   1 +
 src/USER-OMP/msm_omp.cpp                      |   8 +-
 src/USER-OMP/neighbor_omp.h                   |  22 +-
 src/USER-OMP/pair_adp_omp.cpp                 |   4 +
 src/USER-OMP/pair_airebo_omp.cpp              |   2 +
 src/USER-OMP/pair_beck_omp.cpp                |   2 +
 src/USER-OMP/pair_born_coul_long_omp.cpp      |   2 +
 src/USER-OMP/pair_born_coul_msm_omp.cpp       |   2 +
 src/USER-OMP/pair_born_coul_wolf_omp.cpp      |   2 +
 src/USER-OMP/pair_born_omp.cpp                |   2 +
 src/USER-OMP/pair_brownian_omp.cpp            |   2 +
 src/USER-OMP/pair_brownian_poly_omp.cpp       |   2 +
 src/USER-OMP/pair_buck_coul_cut_omp.cpp       |   2 +
 src/USER-OMP/pair_buck_coul_long_omp.cpp      |   2 +
 src/USER-OMP/pair_buck_coul_msm_omp.cpp       |   2 +
 src/USER-OMP/pair_buck_long_coul_long_omp.cpp |  12 +-
 src/USER-OMP/pair_buck_omp.cpp                |   2 +
 src/USER-OMP/pair_cdeam_omp.cpp               |  24 +-
 src/USER-OMP/pair_colloid_omp.cpp             |   2 +
 src/USER-OMP/pair_comb_omp.cpp                |   2 +
 src/USER-OMP/pair_coul_cut_omp.cpp            |   2 +
 src/USER-OMP/pair_coul_cut_soft_omp.cpp       |   2 +
 src/USER-OMP/pair_coul_debye_omp.cpp          |   2 +
 src/USER-OMP/pair_coul_diel_omp.cpp           |   2 +
 src/USER-OMP/pair_coul_dsf_omp.cpp            |   2 +
 src/USER-OMP/pair_coul_long_omp.cpp           |   2 +
 src/USER-OMP/pair_coul_long_soft_omp.cpp      |   2 +
 src/USER-OMP/pair_coul_msm_omp.cpp            |   2 +
 src/USER-OMP/pair_coul_wolf_omp.cpp           |   2 +
 src/USER-OMP/pair_dpd_omp.cpp                 |   2 +
 src/USER-OMP/pair_dpd_tstat_omp.cpp           |   2 +
 src/USER-OMP/pair_eam_omp.cpp                 |   4 +
 src/USER-OMP/pair_edip_omp.cpp                |   2 +
 src/USER-OMP/pair_eim_omp.cpp                 |   6 +
 src/USER-OMP/pair_gauss_cut_omp.cpp           |   2 +
 src/USER-OMP/pair_gauss_omp.cpp               |   2 +
 src/USER-OMP/pair_gayberne_omp.cpp            |   2 +
 src/USER-OMP/pair_gran_hertz_history_omp.cpp  |   2 +
 src/USER-OMP/pair_gran_hooke_history_omp.cpp  |   4 +-
 src/USER-OMP/pair_gran_hooke_omp.cpp          |   4 +-
 src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp   |   2 +
 .../pair_hbond_dreiding_morse_omp.cpp         |   2 +
 src/USER-OMP/pair_line_lj_omp.cpp             |   2 +
 src/USER-OMP/pair_lj96_cut_omp.cpp            |   2 +
 ...air_lj_charmm_coul_charmm_implicit_omp.cpp |   2 +
 .../pair_lj_charmm_coul_charmm_omp.cpp        |   2 +
 src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp |   2 +
 .../pair_lj_charmm_coul_long_soft_omp.cpp     |   3 +-
 src/USER-OMP/pair_lj_charmm_coul_msm_omp.cpp  |   8 +-
 src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp  |   2 +
 src/USER-OMP/pair_lj_class2_coul_long_omp.cpp |   2 +
 src/USER-OMP/pair_lj_class2_omp.cpp           |   2 +
 src/USER-OMP/pair_lj_cubic_omp.cpp            |   2 +
 src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp     |   2 +
 .../pair_lj_cut_coul_cut_soft_omp.cpp         |   2 +
 src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp   |   2 +
 src/USER-OMP/pair_lj_cut_coul_dsf_omp.cpp     |   3 +-
 src/USER-OMP/pair_lj_cut_coul_long_omp.cpp    |   2 +
 .../pair_lj_cut_coul_long_soft_omp.cpp        |   2 +
 src/USER-OMP/pair_lj_cut_coul_msm_omp.cpp     |   2 +
 src/USER-OMP/pair_lj_cut_dipole_cut_omp.cpp   |   2 +
 src/USER-OMP/pair_lj_cut_omp.cpp              |   3 +-
 src/USER-OMP/pair_lj_cut_soft_omp.cpp         |   2 +
 src/USER-OMP/pair_lj_cut_tip4p_cut_omp.cpp    |   4 +-
 src/USER-OMP/pair_lj_cut_tip4p_long_omp.cpp   |   4 +-
 .../pair_lj_cut_tip4p_long_soft_omp.cpp       |   2 +
 src/USER-OMP/pair_lj_expand_omp.cpp           |   2 +
 .../pair_lj_gromacs_coul_gromacs_omp.cpp      |   2 +
 src/USER-OMP/pair_lj_gromacs_omp.cpp          |   2 +
 src/USER-OMP/pair_lj_long_coul_long_omp.cpp   |  42 +-
 src/USER-OMP/pair_lj_long_coul_long_omp.h     |   2 +-
 src/USER-OMP/pair_lj_long_tip4p_long_omp.cpp  |  12 +-
 src/USER-OMP/pair_lj_long_tip4p_long_omp.h    |   2 +-
 src/USER-OMP/pair_lj_sdk_coul_long_omp.cpp    |   2 +
 src/USER-OMP/pair_lj_sdk_coul_msm_omp.cpp     |   2 +
 src/USER-OMP/pair_lj_sdk_omp.cpp              |   2 +
 src/USER-OMP/pair_lj_sf_dipole_sf_omp.cpp     |   2 +
 src/USER-OMP/pair_lj_sf_omp.cpp               |   2 +
 src/USER-OMP/pair_lj_smooth_linear_omp.cpp    |   2 +
 src/USER-OMP/pair_lj_smooth_omp.cpp           |   2 +
 src/USER-OMP/pair_lubricate_omp.cpp           |   2 +
 src/USER-OMP/pair_lubricate_poly_omp.cpp      |   2 +
 src/USER-OMP/pair_meam_spline_omp.cpp         |   3 +
 src/USER-OMP/pair_morse_omp.cpp               |   2 +
 src/USER-OMP/pair_nb3b_harmonic_omp.cpp       |   2 +
 src/USER-OMP/pair_nm_cut_coul_cut_omp.cpp     |   2 +
 src/USER-OMP/pair_nm_cut_coul_long_omp.cpp    |   2 +
 src/USER-OMP/pair_nm_cut_omp.cpp              |   2 +
 src/USER-OMP/pair_peri_lps_omp.cpp            |   2 +
 src/USER-OMP/pair_peri_pmb_omp.cpp            |   2 +
 src/USER-OMP/pair_resquared_omp.cpp           |   2 +
 src/USER-OMP/pair_soft_omp.cpp                |   2 +
 src/USER-OMP/pair_sw_omp.cpp                  |   2 +
 src/USER-OMP/pair_table_omp.cpp               |   2 +
 src/USER-OMP/pair_tersoff_mod_omp.cpp         |   2 +
 src/USER-OMP/pair_tersoff_omp.cpp             |   2 +
 src/USER-OMP/pair_tersoff_table_omp.cpp       |   2 +
 src/USER-OMP/pair_tip4p_cut_omp.cpp           |   4 +-
 src/USER-OMP/pair_tip4p_long_omp.cpp          |   4 +-
 src/USER-OMP/pair_tip4p_long_soft_omp.cpp     |   2 +
 src/USER-OMP/pair_tri_lj_omp.cpp              |   2 +
 src/USER-OMP/pair_yukawa_colloid_omp.cpp      |   2 +
 src/USER-OMP/pair_yukawa_omp.cpp              |   2 +
 src/USER-OMP/pair_zbl_omp.cpp                 |   2 +
 src/USER-OMP/pppm_cg_omp.cpp                  |  19 +-
 src/USER-OMP/pppm_disp_omp.cpp                |  91 ++--
 src/USER-OMP/pppm_disp_omp.h                  |   2 +-
 src/USER-OMP/pppm_omp.cpp                     |  19 +-
 src/USER-OMP/pppm_tip4p_omp.cpp               |  17 +-
 src/USER-OMP/respa_omp.cpp                    | 135 ++++--
 src/USER-OMP/thr_data.cpp                     |  56 ++-
 src/USER-OMP/thr_data.h                       |  15 +-
 src/USER-OMP/thr_omp.cpp                      |   4 +-
 src/finish.cpp                                | 445 ++++++++++++------
 src/input.cpp                                 |  10 +
 src/input.h                                   |   5 +-
 src/min.cpp                                   |  32 +-
 src/min_cg.cpp                                |   2 +-
 src/min_fire.cpp                              |   2 +-
 src/min_hftn.cpp                              |   2 +-
 src/min_quickmin.cpp                          |   2 +-
 src/min_sd.cpp                                |   2 +-
 src/minimize.cpp                              |   4 +-
 src/neighbor.cpp                              |   6 +-
 src/rerun.cpp                                 |   4 +-
 src/respa.cpp                                 |  62 ++-
 src/run.cpp                                   |   8 +-
 src/thermo.cpp                                |  14 +-
 src/timer.cpp                                 | 178 ++++++-
 src/timer.h                                   |  54 ++-
 src/verlet.cpp                                |  37 +-
 189 files changed, 1515 insertions(+), 576 deletions(-)

diff --git a/src/GPU/fix_gpu.cpp b/src/GPU/fix_gpu.cpp
index 6135a1f754..1dea4dc467 100644
--- a/src/GPU/fix_gpu.cpp
+++ b/src/GPU/fix_gpu.cpp
@@ -295,7 +295,7 @@ void FixGPU::post_force(int vflag)
   force->pair->virial[5] += lvirial[5];
 
   if (force->pair->vflag_fdotr) force->pair->virial_fdotr_compute();
-  timer->stamp(TIME_PAIR);
+  timer->stamp(Timer::PAIR);
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/KOKKOS/verlet_kokkos.cpp b/src/KOKKOS/verlet_kokkos.cpp
index 1e7475dc49..b63ca98ba4 100644
--- a/src/KOKKOS/verlet_kokkos.cpp
+++ b/src/KOKKOS/verlet_kokkos.cpp
@@ -107,7 +107,7 @@ void VerletKokkos::setup()
     atomKK->sync(force->pair->execution_space,force->pair->datamask_read);
     atomKK->modified(force->pair->execution_space,force->pair->datamask_modify);
     force->pair->compute(eflag,vflag);
-    timer->stamp(TIME_PAIR);
+    timer->stamp(Timer::PAIR);
   }
   else if (force->pair) force->pair->compute_dummy(eflag,vflag);
 
@@ -133,7 +133,7 @@ void VerletKokkos::setup()
       atomKK->modified(force->improper->execution_space,force->improper->datamask_modify);
       force->improper->compute(eflag,vflag);
     }
-    timer->stamp(TIME_BOND);
+    timer->stamp(Timer::BOND);
   }
 
   if(force->kspace) {
@@ -142,7 +142,7 @@ void VerletKokkos::setup()
       atomKK->sync(force->kspace->execution_space,force->kspace->datamask_read);
       atomKK->modified(force->kspace->execution_space,force->kspace->datamask_modify);
       force->kspace->compute(eflag,vflag);
-      timer->stamp(TIME_KSPACE);
+      timer->stamp(Timer::KSPACE);
     } else force->kspace->compute_dummy(eflag,vflag);
   }
 
@@ -209,7 +209,7 @@ void VerletKokkos::setup_minimal(int flag)
     atomKK->sync(force->pair->execution_space,force->pair->datamask_read);
     atomKK->modified(force->pair->execution_space,force->pair->datamask_modify);
     force->pair->compute(eflag,vflag);
-    timer->stamp(TIME_PAIR);
+    timer->stamp(Timer::PAIR);
   }
   else if (force->pair) force->pair->compute_dummy(eflag,vflag);
 
@@ -235,7 +235,7 @@ void VerletKokkos::setup_minimal(int flag)
       atomKK->modified(force->improper->execution_space,force->improper->datamask_modify);
       force->improper->compute(eflag,vflag);
     }
-    timer->stamp(TIME_BOND);
+    timer->stamp(Timer::BOND);
   }
 
   if(force->kspace) {
@@ -244,7 +244,7 @@ void VerletKokkos::setup_minimal(int flag)
       atomKK->sync(force->kspace->execution_space,force->kspace->datamask_read);
       atomKK->modified(force->kspace->execution_space,force->kspace->datamask_modify);
       force->kspace->compute(eflag,vflag);
-      timer->stamp(TIME_KSPACE);
+      timer->stamp(Timer::KSPACE);
     } else force->kspace->compute_dummy(eflag,vflag);
   }
 
@@ -286,9 +286,11 @@ void VerletKokkos::run(int n)
     // initial time integration
 
     ktimer.reset();
+    timer->stamp();
     modify->initial_integrate(vflag);
     time += ktimer.seconds();
     if (n_post_integrate) modify->post_integrate();
+    timer->stamp(Timer::MODIFY);
 
     // regular communication vs neighbor list rebuild
 
@@ -297,13 +299,17 @@ void VerletKokkos::run(int n)
     if (nflag == 0) {
       timer->stamp();
       comm->forward_comm();
-      timer->stamp(TIME_COMM);
+      timer->stamp(Timer::COMM);
     } else {
       // added debug
       //atomKK->sync(Host,ALL_MASK);
       //atomKK->modified(Host,ALL_MASK);
 
-      if (n_pre_exchange) modify->pre_exchange();
+      if (n_pre_exchange) {
+        timer->stamp();
+        modify->pre_exchange();
+        timer->stamp(Timer::MODIFY);
+      }
       // debug
       //atomKK->sync(Host,ALL_MASK);
       //atomKK->modified(Host,ALL_MASK);
@@ -330,10 +336,13 @@ void VerletKokkos::run(int n)
 
       if (triclinic) domain->lamda2x(atomKK->nlocal+atomKK->nghost);
 
-      timer->stamp(TIME_COMM);
-      if (n_pre_neighbor) modify->pre_neighbor();
+      timer->stamp(Timer::COMM);
+      if (n_pre_neighbor) {
+        modify->pre_neighbor();
+        timer->stamp(Timer::MODIFY);
+      }
       neighbor->build();
-      timer->stamp(TIME_NEIGHBOR);
+      timer->stamp(Timer::NEIGH);
     }
 
     // force computations
@@ -342,19 +351,24 @@ void VerletKokkos::run(int n)
     // and Pair:ev_tally() needs to be called before any tallying
 
     force_clear();
+
+    timer->stamp();
+
     // added for debug
     //atomKK->k_x.sync<LMPHostType>();
     //atomKK->k_f.sync<LMPHostType>();
     //atomKK->k_f.modify<LMPHostType>();
-    if (n_pre_force) modify->pre_force(vflag);
+    if (n_pre_force) {
+      modify->pre_force(vflag);
+      timer->stamp(Timer::MODIFY);
+    }
 
-    timer->stamp();
 
     if (pair_compute_flag) {
       atomKK->sync(force->pair->execution_space,force->pair->datamask_read);
       atomKK->modified(force->pair->execution_space,force->pair->datamask_modify);
       force->pair->compute(eflag,vflag);
-      timer->stamp(TIME_PAIR);
+      timer->stamp(Timer::PAIR);
     }
 
     if (atomKK->molecular) {
@@ -378,20 +392,20 @@ void VerletKokkos::run(int n)
         atomKK->modified(force->improper->execution_space,force->improper->datamask_modify);
         force->improper->compute(eflag,vflag);
       }
-      timer->stamp(TIME_BOND);
+      timer->stamp(Timer::BOND);
     }
 
     if (kspace_compute_flag) {
       atomKK->sync(force->kspace->execution_space,force->kspace->datamask_read);
       atomKK->modified(force->kspace->execution_space,force->kspace->datamask_modify);
       force->kspace->compute(eflag,vflag);
-      timer->stamp(TIME_KSPACE);
+      timer->stamp(Timer::KSPACE);
     }
 
     // reverse communication of forces
 
     if (force->newton) comm->reverse_comm();
-    timer->stamp(TIME_COMM);
+    timer->stamp(Timer::COMM);
 
     // force modifications, final time integration, diagnostics
 
@@ -400,6 +414,7 @@ void VerletKokkos::run(int n)
     if (n_post_force) modify->post_force(vflag);
     modify->final_integrate();
     if (n_end_of_step) modify->end_of_step();
+    timer->stamp(Timer::MODIFY);
 
     time += ktimer.seconds();
 
@@ -410,7 +425,7 @@ void VerletKokkos::run(int n)
 
       timer->stamp();
       output->write(ntimestep);
-      timer->stamp(TIME_OUTPUT);
+      timer->stamp(Timer::OUTPUT);
     }
   }
 }
diff --git a/src/KSPACE/fix_tune_kspace.cpp b/src/KSPACE/fix_tune_kspace.cpp
index e3d5a5b5cd..bed94d0f04 100644
--- a/src/KSPACE/fix_tune_kspace.cpp
+++ b/src/KSPACE/fix_tune_kspace.cpp
@@ -159,7 +159,7 @@ void FixTuneKspace::pre_exchange()
     adjust_rcut(time);
   }
 
-  last_spcpu = timer->elapsed(TIME_LOOP);
+  last_spcpu = timer->elapsed(Timer::TOTAL);
 }
 
 /* ----------------------------------------------------------------------
@@ -177,7 +177,7 @@ double FixTuneKspace::get_timing_info()
     dvalue = 0.0;
     firststep = 1;
   } else {
-    new_cpu = timer->elapsed(TIME_LOOP);
+    new_cpu = timer->elapsed(Timer::TOTAL);
     double cpu_diff = new_cpu - last_spcpu;
     int step_diff = new_step - last_step;
     if (step_diff > 0.0) dvalue = cpu_diff/step_diff;
diff --git a/src/REPLICA/neb.cpp b/src/REPLICA/neb.cpp
index 4c34013bfa..8cb0be7462 100644
--- a/src/REPLICA/neb.cpp
+++ b/src/REPLICA/neb.cpp
@@ -232,7 +232,7 @@ void NEB::run()
   // damped dynamic min styles insure all replicas converge together
 
   timer->init();
-  timer->barrier_start(TIME_LOOP);
+  timer->barrier_start();
 
   while (update->minimize->niter < n1steps) {
     update->minimize->run(nevery);
@@ -240,7 +240,7 @@ void NEB::run()
     if (update->minimize->stop_condition) break;
   }
 
-  timer->barrier_stop(TIME_LOOP);
+  timer->barrier_stop();
 
   update->minimize->cleanup();
 
@@ -302,7 +302,7 @@ void NEB::run()
   // damped dynamic min styles insure all replicas converge together
 
   timer->init();
-  timer->barrier_start(TIME_LOOP);
+  timer->barrier_start();
 
   while (update->minimize->niter < n2steps) {
     update->minimize->run(nevery);
@@ -310,7 +310,7 @@ void NEB::run()
     if (update->minimize->stop_condition) break;
   }
 
-  timer->barrier_stop(TIME_LOOP);
+  timer->barrier_stop();
 
   update->minimize->cleanup();
 
diff --git a/src/REPLICA/prd.cpp b/src/REPLICA/prd.cpp
index a99c201480..de3447db98 100644
--- a/src/REPLICA/prd.cpp
+++ b/src/REPLICA/prd.cpp
@@ -278,8 +278,8 @@ void PRD::command(int narg, char **arg)
   share_event(0,0,0);
 
   timer->init();
-  timer->barrier_start(TIME_LOOP);
-  time_start = timer->array[TIME_LOOP];
+  timer->barrier_start();
+  time_start = timer->get_wall(Timer::TOTAL);
 
   log_event();
 
@@ -305,8 +305,8 @@ void PRD::command(int narg, char **arg)
   time_dephase = time_dynamics = time_quench = time_comm = time_output = 0.0;
   bigint clock = 0;
 
-  timer->barrier_start(TIME_LOOP);
-  time_start = timer->array[TIME_LOOP];
+  timer->barrier_start();
+  time_start = timer->get_wall(Timer::TOTAL);
 
   int istep = 0;
 
@@ -386,7 +386,7 @@ void PRD::command(int narg, char **arg)
     lmp->init();
     update->integrate->setup();
 
-    timer->barrier_start(TIME_LOOP);
+    timer->barrier_start();
 
     if (t_corr > 0) replicate(ireplica);
     if (temp_flag == 0) {
@@ -396,16 +396,16 @@ void PRD::command(int narg, char **arg)
                       universe->uworld);
     }
 
-    timer->barrier_stop(TIME_LOOP);
-    time_comm += timer->array[TIME_LOOP];
+    timer->barrier_stop();
+    time_comm += timer->get_wall(Timer::TOTAL);
 
     // write restart file of hot coords
 
     if (restart_flag) {
-      timer->barrier_start(TIME_LOOP);
+      timer->barrier_start();
       output->write_restart(update->ntimestep);
-      timer->barrier_stop(TIME_LOOP);
-      time_output += timer->array[TIME_LOOP];
+      timer->barrier_stop();
+      time_output += timer->get_wall(Timer::TOTAL);
     }
 
     if (stepmode == 0) istep = update->ntimestep - update->beginstep;
@@ -416,14 +416,14 @@ void PRD::command(int narg, char **arg)
 
   // set total timers and counters so Finish() will process them
 
-  timer->array[TIME_LOOP] = time_start;
-  timer->barrier_stop(TIME_LOOP);
+  timer->set_wall(Timer::TOTAL, time_start);
+  timer->barrier_stop();
 
-  timer->array[TIME_PAIR] = time_dephase;
-  timer->array[TIME_BOND] = time_dynamics;
-  timer->array[TIME_KSPACE] = time_quench;
-  timer->array[TIME_COMM] = time_comm;
-  timer->array[TIME_OUTPUT] = time_output;
+  timer->set_wall(Timer::DEPHASE, time_dephase);
+  timer->set_wall(Timer::DYNAMICS, time_dynamics);
+  timer->set_wall(Timer::QUENCH, time_quench);
+  timer->set_wall(Timer::REPCOMM, time_comm);
+  timer->set_wall(Timer::REPOUT, time_output);
 
   neighbor->ncalls = nbuild;
   neighbor->ndanger = ndanger;
@@ -431,14 +431,14 @@ void PRD::command(int narg, char **arg)
   if (me_universe == 0) {
     if (universe->uscreen)
       fprintf(universe->uscreen,
-              "Loop time of %g on %d procs for %d steps with " BIGINT_FORMAT
+              "Loop time of %g on %d procs for %d steps with " BIGINT_FORMAT 
               " atoms\n",
-              timer->array[TIME_LOOP],nprocs_universe,nsteps,atom->natoms);
+              timer->get_wall(Timer::TOTAL),nprocs_universe,nsteps,atom->natoms);
     if (universe->ulogfile)
       fprintf(universe->ulogfile,
-              "Loop time of %g on %d procs for %d steps with " BIGINT_FORMAT
+              "Loop time of %g on %d procs for %d steps with " BIGINT_FORMAT 
               " atoms\n",
-              timer->array[TIME_LOOP],nprocs_universe,nsteps,atom->natoms);
+              timer->get_wall(Timer::TOTAL),nprocs_universe,nsteps,atom->natoms);
   }
 
   if (me == 0) {
@@ -541,10 +541,10 @@ void PRD::dynamics(int nsteps, double &time_category)
   //modify->addstep_compute_all(update->ntimestep);
   bigint ncalls = neighbor->ncalls;
 
-  timer->barrier_start(TIME_LOOP);
+  timer->barrier_start();
   update->integrate->run(nsteps);
-  timer->barrier_stop(TIME_LOOP);
-  time_category += timer->array[TIME_LOOP];
+  timer->barrier_stop();
+  time_category += timer->get_wall(Timer::TOTAL);
 
   nbuild += neighbor->ncalls - ncalls;
   ndanger += neighbor->ndanger;
@@ -583,10 +583,10 @@ void PRD::quench()
 
   int ncalls = neighbor->ncalls;
 
-  timer->barrier_start(TIME_LOOP);
+  timer->barrier_start();
   update->minimize->run(maxiter);
-  timer->barrier_stop(TIME_LOOP);
-  time_quench += timer->array[TIME_LOOP];
+  timer->barrier_stop();
+  time_quench += timer->get_wall(Timer::TOTAL);
 
   if (neighbor->ncalls == ncalls) quench_reneighbor = 0;
   else quench_reneighbor = 1;
@@ -619,7 +619,7 @@ int PRD::check_event(int replica_num)
   if (compute_event->compute_scalar() > 0.0) worldflag = 1;
   if (replica_num >= 0 && replica_num != universe->iworld) worldflag = 0;
 
-  timer->barrier_start(TIME_LOOP);
+  timer->barrier_start();
 
   if (me == 0) MPI_Allreduce(&worldflag,&universeflag,1,
                              MPI_INT,MPI_SUM,comm_replica);
@@ -655,8 +655,8 @@ int PRD::check_event(int replica_num)
     MPI_Bcast(&ireplica,1,MPI_INT,0,world);
   }
 
-  timer->barrier_stop(TIME_LOOP);
-  time_comm += timer->array[TIME_LOOP];
+  timer->barrier_stop();
+  time_comm += timer->get_wall(Timer::TOTAL);
 
   return ireplica;
 }
@@ -672,14 +672,14 @@ int PRD::check_event(int replica_num)
 
 void PRD::share_event(int ireplica, int flag, int decrement)
 {
-  timer->barrier_start(TIME_LOOP);
+  timer->barrier_start();
 
   // communicate quenched coords to all replicas and store as event
   // decrement event counter if flag = 0 since not really an event
 
   replicate(ireplica);
-  timer->barrier_stop(TIME_LOOP);
-  time_comm += timer->array[TIME_LOOP];
+  timer->barrier_stop();
+  time_comm += timer->get_wall(Timer::TOTAL);
 
   // adjust time for last correlated event check (not on first event)
 
@@ -717,21 +717,21 @@ void PRD::share_event(int ireplica, int flag, int decrement)
   // addstep_compute_all insures eng/virial are calculated if needed
 
   if (output->ndump && universe->iworld == 0) {
-    timer->barrier_start(TIME_LOOP);
+    timer->barrier_start();
     modify->addstep_compute_all(update->ntimestep);
     update->integrate->setup_minimal(1);
     output->write_dump(update->ntimestep);
-    timer->barrier_stop(TIME_LOOP);
-    time_output += timer->array[TIME_LOOP];
+    timer->barrier_stop();
+    time_output += timer->get_wall(Timer::TOTAL);
   }
 
   // restore and communicate hot coords to all replicas
 
   fix_event->restore_state_quench();
-  timer->barrier_start(TIME_LOOP);
+  timer->barrier_start();
   replicate(ireplica);
-  timer->barrier_stop(TIME_LOOP);
-  time_comm += timer->array[TIME_LOOP];
+  timer->barrier_stop();
+  time_comm += timer->get_wall(Timer::TOTAL);
 }
 
 /* ----------------------------------------------------------------------
@@ -740,13 +740,13 @@ void PRD::share_event(int ireplica, int flag, int decrement)
 
 void PRD::log_event()
 {
-  timer->array[TIME_LOOP] = time_start;
+  timer->set_wall(Timer::TOTAL, time_start);
   if (universe->me == 0) {
     if (universe->uscreen)
       fprintf(universe->uscreen,
               BIGINT_FORMAT " %.3f " BIGINT_FORMAT " %d %d %d %d\n",
               fix_event->event_timestep,
-              timer->elapsed(TIME_LOOP),
+              timer->elapsed(Timer::TOTAL),
               fix_event->clock,
               fix_event->event_number,fix_event->correlated_event,
               fix_event->ncoincident,
@@ -755,7 +755,7 @@ void PRD::log_event()
       fprintf(universe->ulogfile,
               BIGINT_FORMAT " %.3f " BIGINT_FORMAT " %d %d %d %d\n",
               fix_event->event_timestep,
-              timer->elapsed(TIME_LOOP),
+              timer->elapsed(Timer::TOTAL),
               fix_event->clock,
               fix_event->event_number,fix_event->correlated_event,
               fix_event->ncoincident,
diff --git a/src/REPLICA/tad.cpp b/src/REPLICA/tad.cpp
index 7c858c754f..80381a7a3b 100644
--- a/src/REPLICA/tad.cpp
+++ b/src/REPLICA/tad.cpp
@@ -255,8 +255,8 @@ void TAD::command(int narg, char **arg)
   quench();
 
   timer->init();
-  timer->barrier_start(TIME_LOOP);
-  time_start = timer->array[TIME_LOOP];
+  timer->barrier_start();
+  time_start = timer->get_wall(Timer::TOTAL);
   fix_event->store_event_tad(update->ntimestep);
   log_event(0);
   fix_event->restore_state_quench();
@@ -275,8 +275,8 @@ void TAD::command(int narg, char **arg)
   nbuild = ndanger = 0;
   time_neb = time_dynamics = time_quench = time_comm = time_output = 0.0;
 
-  timer->barrier_start(TIME_LOOP);
-  time_start = timer->array[TIME_LOOP];
+  timer->barrier_start();
+  time_start = timer->get_wall(Timer::TOTAL);
 
   int confident_flag, event_flag;
 
@@ -347,10 +347,10 @@ void TAD::command(int narg, char **arg)
     // write restart file of hot coords
 
       if (restart_flag) {
-        timer->barrier_start(TIME_LOOP);
+        timer->barrier_start();
         output->write_restart(update->ntimestep);
-        timer->barrier_stop(TIME_LOOP);
-        time_output += timer->array[TIME_LOOP];
+        timer->barrier_stop();
+        time_output += timer->get_wall(Timer::TOTAL);
       }
     }
 
@@ -381,14 +381,14 @@ void TAD::command(int narg, char **arg)
 
   // set total timers and counters so Finish() will process them
 
-  timer->array[TIME_LOOP] = time_start;
-  timer->barrier_stop(TIME_LOOP);
+  timer->set_wall(Timer::TOTAL, time_start);
+  timer->barrier_stop();
 
-  timer->array[TIME_PAIR] = time_neb;
-  timer->array[TIME_BOND] = time_dynamics;
-  timer->array[TIME_KSPACE] = time_quench;
-  timer->array[TIME_COMM] = time_comm;
-  timer->array[TIME_OUTPUT] = time_output;
+  timer->set_wall(Timer::NEB, time_neb);
+  timer->set_wall(Timer::DYNAMICS, time_dynamics);
+  timer->set_wall(Timer::QUENCH, time_quench);
+  timer->set_wall(Timer::REPCOMM, time_comm);
+  timer->set_wall(Timer::REPOUT, time_output);
 
   neighbor->ncalls = nbuild;
   neighbor->ndanger = ndanger;
@@ -396,14 +396,14 @@ void TAD::command(int narg, char **arg)
   if (me_universe == 0) {
     if (universe->uscreen)
       fprintf(universe->uscreen,
-              "Loop time of %g on %d procs for %d steps with " BIGINT_FORMAT
+              "Loop time of %g on %d procs for %d steps with " BIGINT_FORMAT 
               " atoms\n",
-              timer->array[TIME_LOOP],nprocs_universe,nsteps,atom->natoms);
-    if (universe->ulogfile)
+              timer->get_wall(Timer::TOTAL),nprocs_universe,nsteps,atom->natoms);
+    if (universe->ulogfile) 
       fprintf(universe->ulogfile,
-              "Loop time of %g on %d procs for %d steps with " BIGINT_FORMAT
+              "Loop time of %g on %d procs for %d steps with " BIGINT_FORMAT 
               " atoms\n",
-              timer->array[TIME_LOOP],nprocs_universe,nsteps,atom->natoms);
+              timer->get_wall(Timer::TOTAL),nprocs_universe,nsteps,atom->natoms);
   }
 
   if (me_universe == 0) fclose(ulogfile_neb);
@@ -451,10 +451,10 @@ void TAD::dynamics()
   //modify->addstep_compute_all(update->ntimestep);
   int ncalls = neighbor->ncalls;
 
-  timer->barrier_start(TIME_LOOP);
+  timer->barrier_start();
   update->integrate->run(t_event);
-  timer->barrier_stop(TIME_LOOP);
-  time_dynamics += timer->array[TIME_LOOP];
+  timer->barrier_stop();
+  time_dynamics += timer->get_wall(Timer::TOTAL);
 
   nbuild += neighbor->ncalls - ncalls;
   ndanger += neighbor->ndanger;
@@ -493,10 +493,10 @@ void TAD::quench()
 
   int ncalls = neighbor->ncalls;
 
-  timer->barrier_start(TIME_LOOP);
+  timer->barrier_start();
   update->minimize->run(maxiter);
-  timer->barrier_stop(TIME_LOOP);
-  time_quench += timer->array[TIME_LOOP];
+  timer->barrier_stop();
+  time_quench += timer->get_wall(Timer::TOTAL);
 
   if (neighbor->ncalls == ncalls) quench_reneighbor = 0;
   else quench_reneighbor = 1;
@@ -535,14 +535,14 @@ int TAD::check_event()
 
 void TAD::log_event(int ievent)
 {
-  timer->array[TIME_LOOP] = time_start;
+  timer->set_wall(Timer::TOTAL, time_start);
   if (universe->me == 0) {
     double tfrac = 0.0;
     if (universe->uscreen)
       fprintf(universe->uscreen,
               BIGINT_FORMAT " %.3f %d %d %s %.3f %.3f %.3f %.3f\n",
               fix_event->event_timestep,
-              timer->elapsed(TIME_LOOP),
+              timer->elapsed(Timer::TOTAL),
               fix_event->event_number,ievent,
               "E ",
               fix_event->ebarrier,tfrac,
@@ -551,7 +551,7 @@ void TAD::log_event(int ievent)
       fprintf(universe->ulogfile,
               BIGINT_FORMAT " %.3f %d %d %s %.3f %.3f %.3f %.3f\n",
               fix_event->event_timestep,
-              timer->elapsed(TIME_LOOP),
+              timer->elapsed(Timer::TOTAL),
               fix_event->event_number,ievent,
               "E ",
               fix_event->ebarrier,tfrac,
@@ -563,12 +563,12 @@ void TAD::log_event(int ievent)
   // addstep_compute_all insures eng/virial are calculated if needed
 
   if (output->ndump && universe->iworld == 0) {
-    timer->barrier_start(TIME_LOOP);
+    timer->barrier_start();
     modify->addstep_compute_all(update->ntimestep);
     update->integrate->setup_minimal(1);
     output->write_dump(update->ntimestep);
-    timer->barrier_stop(TIME_LOOP);
-    time_output += timer->array[TIME_LOOP];
+    timer->barrier_stop();
+    time_output += timer->get_wall(Timer::TOTAL);
   }
 
 }
@@ -604,10 +604,10 @@ void TAD::options(int narg, char **arg)
   while (iarg < narg) {
     if (strcmp(arg[iarg],"min") == 0) {
       if (iarg+5 > narg) error->all(FLERR,"Illegal tad command");
-      etol = atof(arg[iarg+1]);
-      ftol = atof(arg[iarg+2]);
-      maxiter = atoi(arg[iarg+3]);
-      maxeval = atoi(arg[iarg+4]);
+      etol = force->numeric(FLERR,arg[iarg+1]);
+      ftol = force->numeric(FLERR,arg[iarg+2]);
+      maxiter = force->inumeric(FLERR,arg[iarg+3]);
+      maxeval = force->inumeric(FLERR,arg[iarg+4]);
       if (maxiter < 0 || maxeval < 0 ||
           etol < 0.0 || ftol < 0.0 )
         error->all(FLERR,"Illegal tad command");
@@ -615,11 +615,11 @@ void TAD::options(int narg, char **arg)
 
     } else if (strcmp(arg[iarg],"neb") == 0) {
       if (iarg+6 > narg) error->all(FLERR,"Illegal tad command");
-      etol_neb = atof(arg[iarg+1]);
-      ftol_neb = atof(arg[iarg+2]);
-      n1steps_neb = atoi(arg[iarg+3]);
-      n2steps_neb = atoi(arg[iarg+4]);
-      nevery_neb = atoi(arg[iarg+5]);
+      etol_neb = force->numeric(FLERR,arg[iarg+1]);
+      ftol_neb = force->numeric(FLERR,arg[iarg+2]);
+      n1steps_neb = force->inumeric(FLERR,arg[iarg+3]);
+      n2steps_neb = force->inumeric(FLERR,arg[iarg+4]);
+      nevery_neb = force->inumeric(FLERR,arg[iarg+5]);
       if (etol_neb < 0.0 || ftol_neb < 0.0 ||
           n1steps_neb < 0 || n2steps_neb < 0 ||
           nevery_neb < 0) error->all(FLERR,"Illegal tad command");
@@ -753,10 +753,10 @@ void TAD::perform_neb(int ievent)
   // had to bypass timer interface
   // because timer->array is reset inside neb->run()
 
-  //    timer->barrier_start(TIME_LOOP);
+  //    timer->barrier_start();
   //    neb->run();
-  //    timer->barrier_stop(TIME_LOOP);
-  //    time_neb += timer->array[TIME_LOOP];
+  //    timer->barrier_stop();
+  //    time_neb += timer->get_wall(Timer::TOTAL);
 
   MPI_Barrier(world);
   double time_tmp = MPI_Wtime();
@@ -977,7 +977,7 @@ void TAD::compute_tlo(int ievent)
 
   // first-replica output about each event
 
-  timer->array[TIME_LOOP] = time_start;
+  timer->set_wall(Timer::TOTAL, time_start);
   if (universe->me == 0) {
     double tfrac = 0.0;
     if (ievent > 0) tfrac = delthi/deltstop;
@@ -986,7 +986,7 @@ void TAD::compute_tlo(int ievent)
       fprintf(universe->uscreen,
               BIGINT_FORMAT " %.3f %d %d %s %.3f %.3f %.3f %.3f\n",
               fix_event_list[ievent]->event_timestep,
-              timer->elapsed(TIME_LOOP),
+              timer->elapsed(Timer::TOTAL),
               fix_event->event_number,
               ievent,statstr,ebarrier,tfrac,
               fix_event->tlo,deltlo);
@@ -995,7 +995,7 @@ void TAD::compute_tlo(int ievent)
       fprintf(universe->ulogfile,
               BIGINT_FORMAT " %.3f %d %d %s %.3f %.3f %.3f %.3f\n",
               fix_event_list[ievent]->event_timestep,
-              timer->elapsed(TIME_LOOP),
+              timer->elapsed(Timer::TOTAL),
               fix_event->event_number,
               ievent,statstr,ebarrier,tfrac,
               fix_event->tlo,deltlo);
diff --git a/src/REPLICA/temper.cpp b/src/REPLICA/temper.cpp
index 8d7204345f..45e100ad52 100644
--- a/src/REPLICA/temper.cpp
+++ b/src/REPLICA/temper.cpp
@@ -204,7 +204,7 @@ void Temper::command(int narg, char **arg)
   }
 
   timer->init();
-  timer->barrier_start(TIME_LOOP);
+  timer->barrier_start();
 
   for (int iswap = 0; iswap < nswaps; iswap++) {
 
@@ -309,7 +309,7 @@ void Temper::command(int narg, char **arg)
     if (me_universe == 0) print_status();
   }
 
-  timer->barrier_stop(TIME_LOOP);
+  timer->barrier_stop();
 
   update->integrate->cleanup();
 
diff --git a/src/REPLICA/verlet_split.cpp b/src/REPLICA/verlet_split.cpp
index c9da4270fb..408821fe22 100644
--- a/src/REPLICA/verlet_split.cpp
+++ b/src/REPLICA/verlet_split.cpp
@@ -279,7 +279,7 @@ void VerletSplit::run(int n)
 
   MPI_Barrier(universe->uworld);
   timer->init();
-  timer->barrier_start(TIME_LOOP);
+  timer->barrier_start();
 
   // setup initial Rspace <-> Kspace comm params
 
@@ -325,7 +325,7 @@ void VerletSplit::run(int n)
       if (nflag == 0) {
         timer->stamp();
         comm->forward_comm();
-        timer->stamp(TIME_COMM);
+        timer->stamp(Timer::COMM);
       } else {
         if (n_pre_exchange) modify->pre_exchange();
         if (triclinic) domain->x2lamda(atom->nlocal);
@@ -340,10 +340,10 @@ void VerletSplit::run(int n)
         if (sortflag && ntimestep >= atom->nextsort) atom->sort();
         comm->borders();
         if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
-        timer->stamp(TIME_COMM);
+        timer->stamp(Timer::COMM);
         if (n_pre_neighbor) modify->pre_neighbor();
         neighbor->build();
-        timer->stamp(TIME_NEIGHBOR);
+        timer->stamp(Timer::NEIGH);
       }
     }
 
@@ -363,7 +363,7 @@ void VerletSplit::run(int n)
       timer->stamp();
       if (force->pair) {
         force->pair->compute(eflag,vflag);
-        timer->stamp(TIME_PAIR);
+        timer->stamp(Timer::PAIR);
       }
 
       if (atom->molecular) {
@@ -371,12 +371,12 @@ void VerletSplit::run(int n)
         if (force->angle) force->angle->compute(eflag,vflag);
         if (force->dihedral) force->dihedral->compute(eflag,vflag);
         if (force->improper) force->improper->compute(eflag,vflag);
-        timer->stamp(TIME_BOND);
+        timer->stamp(Timer::BOND);
       }
 
       if (force->newton) {
         comm->reverse_comm();
-        timer->stamp(TIME_COMM);
+        timer->stamp(Timer::COMM);
       }
 
     } else {
@@ -388,14 +388,14 @@ void VerletSplit::run(int n)
       if (force->kspace) {
         timer->stamp();
         force->kspace->compute(eflag,vflag);
-        timer->stamp(TIME_KSPACE);
+        timer->stamp(Timer::KSPACE);
       }
 
       // TIP4P PPPM puts forces on ghost atoms, so must reverse_comm()
 
       if (tip4p_flag && force->newton) {
         comm->reverse_comm();
-        timer->stamp(TIME_COMM);
+        timer->stamp(Timer::COMM);
       }
     }
 
@@ -407,14 +407,16 @@ void VerletSplit::run(int n)
     // all output
 
     if (master) {
+      timer->stamp();
       if (n_post_force) modify->post_force(vflag);
       modify->final_integrate();
       if (n_end_of_step) modify->end_of_step();
+      timer->stamp(Timer::MODIFY);
 
       if (ntimestep == output->next) {
         timer->stamp();
         output->write(ntimestep);
-        timer->stamp(TIME_OUTPUT);
+        timer->stamp(Timer::OUTPUT);
       }
     }
   }
@@ -486,7 +488,7 @@ void VerletSplit::rk_setup()
       atom->map_clear();
       comm->borders();
       if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
-      timer->stamp(TIME_COMM);
+      timer->stamp(Timer::COMM);
     }
   }
 }
@@ -535,7 +537,7 @@ void VerletSplit::r2k_comm()
   if (tip4p_flag && !master) {
     timer->stamp();
     comm->forward_comm();
-    timer->stamp(TIME_COMM);
+    timer->stamp(Timer::COMM);
   }
 }
 
diff --git a/src/USER-CUDA/verlet_cuda.cpp b/src/USER-CUDA/verlet_cuda.cpp
index 7d4a0c4114..eba4a0b09c 100644
--- a/src/USER-CUDA/verlet_cuda.cpp
+++ b/src/USER-CUDA/verlet_cuda.cpp
@@ -679,7 +679,7 @@ void VerletCuda::run(int n)
         my_gettime(CLOCK_REALTIME, &starttime);
         timer->stamp();
         comm->forward_comm(1);
-        timer->stamp(TIME_COMM);
+        timer->stamp(Timer::COMM);
         my_gettime(CLOCK_REALTIME, &endtime);
         cuda->shared_data.cuda_timings.comm_forward_total +=
           endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000;
@@ -699,7 +699,7 @@ void VerletCuda::run(int n)
         //start force calculation asynchronus
         cuda->shared_data.comm.comm_phase = 1;
         force->pair->compute(eflag, vflag);
-        timer->stamp(TIME_PAIR);
+        timer->stamp(Timer::PAIR);
         //CudaWrapper_Sync();
 
         //download comm buffers from GPU, perform MPI communication and upload buffers again
@@ -708,11 +708,11 @@ void VerletCuda::run(int n)
         my_gettime(CLOCK_REALTIME, &endtime);
         cuda->shared_data.cuda_timings.comm_forward_total +=
           endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000;
-        timer->stamp(TIME_COMM);
+        timer->stamp(Timer::COMM);
 
         //wait for force calculation
         CudaWrapper_Sync();
-        timer->stamp(TIME_PAIR);
+        timer->stamp(Timer::PAIR);
 
         //unpack communication buffers
         my_gettime(CLOCK_REALTIME, &starttime);
@@ -721,7 +721,7 @@ void VerletCuda::run(int n)
         cuda->shared_data.cuda_timings.comm_forward_total +=
           endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000;
 
-        timer->stamp(TIME_COMM);
+        timer->stamp(Timer::COMM);
         MYDBG(printf("# CUDA VerletCuda::iterate: communicate done\n");)
         cuda->shared_data.cuda_timings.test1 +=
           endtotal.tv_sec - starttotal.tv_sec + 1.0 * (endtotal.tv_nsec - starttotal.tv_nsec) / 1000000000;
@@ -732,7 +732,7 @@ void VerletCuda::run(int n)
         my_gettime(CLOCK_REALTIME, &endtime);
         cuda->shared_data.cuda_timings.comm_forward_total +=
           endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000;
-        timer->stamp(TIME_COMM);
+        timer->stamp(Timer::COMM);
         MYDBG(printf("# CUDA VerletCuda::iterate: communicate done\n");)
       }
     } else {
@@ -822,7 +822,7 @@ void VerletCuda::run(int n)
       cuda->shared_data.buffer_new = 2;
 
       MYDBG(printf("# CUDA VerletCuda::iterate: neighbor build\n");)
-      timer->stamp(TIME_COMM);
+      timer->stamp(Timer::COMM);
       my_gettime(CLOCK_REALTIME, &endtime);
       cuda->shared_data.cuda_timings.test2 +=
         endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000;
@@ -830,7 +830,7 @@ void VerletCuda::run(int n)
       //rebuild neighbor list
       test_atom(testatom, "Pre Neighbor");
       neighbor->build(0);
-      timer->stamp(TIME_NEIGHBOR);
+      timer->stamp(Timer::NEIGH);
       MYDBG(printf("# CUDA VerletCuda::iterate: neighbor done\n");)
       //if bonded interactions are used (in this case collect_forces_later is true), transfer data which only changes upon exchange/border routines from GPU to CPU
       if(cuda->shared_data.pair.collect_forces_later) {
@@ -917,7 +917,7 @@ void VerletCuda::run(int n)
       if(not cuda->shared_data.pair.collect_forces_later)
         CudaWrapper_Sync();
 
-      timer->stamp(TIME_PAIR);
+      timer->stamp(Timer::PAIR);
     }
 
     //calculate bonded interactions
@@ -927,11 +927,11 @@ void VerletCuda::run(int n)
       if(n_pre_force == 0) Verlet::force_clear();
       else  cuda->cu_f->downloadAsync(2);
 
-      timer->stamp(TIME_PAIR);
+      timer->stamp(Timer::PAIR);
 
       if(neighbor->lastcall == update->ntimestep) {
         neighbor->build_topology();
-        timer->stamp(TIME_NEIGHBOR);
+        timer->stamp(Timer::NEIGH);
       }
 
       test_atom(testatom, "pre bond force");
@@ -944,7 +944,7 @@ void VerletCuda::run(int n)
 
       if(force->improper) force->improper->compute(eflag, vflag);
 
-      timer->stamp(TIME_BOND);
+      timer->stamp(Timer::BOND);
     }
 
     //collect forces in case pair force and bonded interactions were overlapped, and either no KSPACE or a GPU KSPACE style is used
@@ -969,7 +969,7 @@ void VerletCuda::run(int n)
 
       if(vflag) cuda->cu_virial->download();
 
-      timer->stamp(TIME_PAIR);
+      timer->stamp(Timer::PAIR);
 
       my_gettime(CLOCK_REALTIME, &endtime);
       cuda->shared_data.cuda_timings.pair_force_collection +=
@@ -987,7 +987,7 @@ void VerletCuda::run(int n)
         if(n_pre_force == 0) Verlet::force_clear();
         else  cuda->cu_f->downloadAsync(2);
 
-        timer->stamp(TIME_PAIR);
+        timer->stamp(Timer::PAIR);
       }
 
       force->kspace->compute(eflag, vflag);
@@ -995,7 +995,7 @@ void VerletCuda::run(int n)
       if((not cuda->shared_data.pppm.cudable_force) && (not cuda->shared_data.pair.collect_forces_later))
         cuda->uploadAll();
 
-      timer->stamp(TIME_KSPACE);
+      timer->stamp(Timer::KSPACE);
     }
 
     //collect forces in case pair forces and kspace was overlaped
@@ -1018,7 +1018,7 @@ void VerletCuda::run(int n)
 
       if(vflag) cuda->cu_virial->download();
 
-      timer->stamp(TIME_PAIR);
+      timer->stamp(Timer::PAIR);
 
       my_gettime(CLOCK_REALTIME, &endtime);
       cuda->shared_data.cuda_timings.pair_force_collection +=
@@ -1028,7 +1028,7 @@ void VerletCuda::run(int n)
     //send forces on ghost atoms back to other GPU: THIS SHOULD NEVER HAPPEN
     if(force->newton) {
       comm->reverse_comm();
-      timer->stamp(TIME_COMM);
+      timer->stamp(Timer::COMM);
     }
 
     test_atom(testatom, "post force");
@@ -1054,7 +1054,7 @@ void VerletCuda::run(int n)
 
       timer->stamp();
       output->write(ntimestep);
-      timer->stamp(TIME_OUTPUT);
+      timer->stamp(Timer::OUTPUT);
     }
 
 
diff --git a/src/USER-FEP/compute_fep.cpp b/src/USER-FEP/compute_fep.cpp
index 06103f874b..10551a15b9 100644
--- a/src/USER-FEP/compute_fep.cpp
+++ b/src/USER-FEP/compute_fep.cpp
@@ -305,11 +305,11 @@ void ComputeFEP::compute_vector()
   timer->stamp();
   if (force->pair && force->pair->compute_flag) {
     force->pair->compute(eflag,vflag);
-    timer->stamp(TIME_PAIR);
+    timer->stamp(Timer::PAIR);
   }
   if (chgflag && force->kspace && force->kspace->compute_flag) {
     force->kspace->compute(eflag,vflag);
-    timer->stamp(TIME_KSPACE);
+    timer->stamp(Timer::KSPACE);
   }
 
   // accumulate force/energy/virial from /gpu pair styles
@@ -322,11 +322,11 @@ void ComputeFEP::compute_vector()
   timer->stamp();
   if (force->pair && force->pair->compute_flag) {
     force->pair->compute(eflag,vflag);
-    timer->stamp(TIME_PAIR);
+    timer->stamp(Timer::PAIR);
   }
   if (chgflag && force->kspace && force->kspace->compute_flag) {
     force->kspace->compute(eflag,vflag);
-    timer->stamp(TIME_KSPACE);
+    timer->stamp(Timer::KSPACE);
   }
 
   // accumulate force/energy/virial from /gpu pair styles
diff --git a/src/USER-INTEL/verlet_intel.cpp b/src/USER-INTEL/verlet_intel.cpp
index 5bfd04639c..039e3bc36e 100644
--- a/src/USER-INTEL/verlet_intel.cpp
+++ b/src/USER-INTEL/verlet_intel.cpp
@@ -272,8 +272,10 @@ void VerletIntel::run(int n)
 
     // initial time integration
 
+    timer->stamp();
     modify->initial_integrate(vflag);
     if (n_post_integrate) modify->post_integrate();
+    timer->stamp(Timer::MODIFY);
 
     // regular communication vs neighbor list rebuild
 
@@ -282,9 +284,13 @@ void VerletIntel::run(int n)
     if (nflag == 0) {
       timer->stamp();
       comm->forward_comm();
-      timer->stamp(TIME_COMM);
+      timer->stamp(Timer::COMM);
     } else {
-      if (n_pre_exchange) modify->pre_exchange();
+      if (n_pre_exchange) {
+        timer->stamp();
+        modify->pre_exchange();
+        timer->stamp(Timer::MODIFY);
+      }
       if (triclinic) domain->x2lamda(atom->nlocal);
       domain->pbc();
       if (domain->box_change) {
@@ -297,10 +303,13 @@ void VerletIntel::run(int n)
       if (sortflag && ntimestep >= atom->nextsort) atom->sort();
       comm->borders();
       if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
-      timer->stamp(TIME_COMM);
-      if (n_pre_neighbor) modify->pre_neighbor();
+      timer->stamp(Timer::COMM);
+      if (n_pre_neighbor) {
+        modify->pre_neighbor();
+        timer->stamp(Timer::MODIFY);
+      }
       neighbor->build();
-      timer->stamp(TIME_NEIGHBOR);
+      timer->stamp(Timer::NEIGH);
     }
 
     // force computations
@@ -309,13 +318,18 @@ void VerletIntel::run(int n)
     // and Pair:ev_tally() needs to be called before any tallying
 
     force_clear();
-    if (n_pre_force) modify->pre_force(vflag);
 
     timer->stamp();
 
+    if (n_pre_force) {
+      modify->pre_force(vflag);
+      timer->stamp(Timer::MODIFY);
+    }
+
+
     if (pair_compute_flag) {
       force->pair->compute(eflag,vflag);
-      timer->stamp(TIME_PAIR);
+      timer->stamp(Timer::PAIR);
     }
 
     if (atom->molecular) {
@@ -323,18 +337,18 @@ void VerletIntel::run(int n)
       if (force->angle) force->angle->compute(eflag,vflag);
       if (force->dihedral) force->dihedral->compute(eflag,vflag);
       if (force->improper) force->improper->compute(eflag,vflag);
-      timer->stamp(TIME_BOND);
+      timer->stamp(Timer::BOND);
     }
 
     if (kspace_compute_flag) {
       force->kspace->compute(eflag,vflag);
-      timer->stamp(TIME_KSPACE);
+      timer->stamp(Timer::KSPACE);
     }
 
     #ifdef _LMP_INTEL_OFFLOAD
     if (sync_mode == 1) {
       fix_intel->sync_coprocessor();
-      timer->stamp(TIME_PAIR);
+      timer->stamp(Timer::PAIR);
     }
     #endif
 
@@ -342,13 +356,13 @@ void VerletIntel::run(int n)
 
     if (force->newton) {
       comm->reverse_comm();
-      timer->stamp(TIME_COMM);
+      timer->stamp(Timer::COMM);
     }
 
     #ifdef _LMP_INTEL_OFFLOAD
     if (sync_mode == 2) {
       fix_intel->sync_coprocessor();
-      timer->stamp(TIME_PAIR);
+      timer->stamp(Timer::PAIR);
     }
     #endif
 
@@ -357,13 +371,14 @@ void VerletIntel::run(int n)
     if (n_post_force) modify->post_force(vflag);
     modify->final_integrate();
     if (n_end_of_step) modify->end_of_step();
+    timer->stamp(Timer::MODIFY);
 
     // all output
 
     if (ntimestep == output->next) {
       timer->stamp();
       output->write(ntimestep);
-      timer->stamp(TIME_OUTPUT);
+      timer->stamp(Timer::OUTPUT);
     }
   }
 }
diff --git a/src/USER-INTEL/verlet_split_intel.cpp b/src/USER-INTEL/verlet_split_intel.cpp
index f1b64331c4..806b3652f9 100644
--- a/src/USER-INTEL/verlet_split_intel.cpp
+++ b/src/USER-INTEL/verlet_split_intel.cpp
@@ -283,7 +283,7 @@ void VerletSplitIntel::run(int n)
 
   MPI_Barrier(universe->uworld);
   timer->init();
-  timer->barrier_start(TIME_LOOP);
+  timer->barrier_start();
 
   // setup initial Rspace <-> Kspace comm params
 
@@ -329,7 +329,7 @@ void VerletSplitIntel::run(int n)
       if (nflag == 0) {
         timer->stamp();
         comm->forward_comm();
-        timer->stamp(TIME_COMM);
+        timer->stamp(Timer::COMM);
       } else {
         if (n_pre_exchange) modify->pre_exchange();
         if (triclinic) domain->x2lamda(atom->nlocal);
@@ -344,10 +344,10 @@ void VerletSplitIntel::run(int n)
         if (sortflag && ntimestep >= atom->nextsort) atom->sort();
         comm->borders();
         if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
-        timer->stamp(TIME_COMM);
+        timer->stamp(Timer::COMM);
         if (n_pre_neighbor) modify->pre_neighbor();
         neighbor->build();
-        timer->stamp(TIME_NEIGHBOR);
+        timer->stamp(Timer::NEIGH);
       }
     }
 
@@ -367,7 +367,7 @@ void VerletSplitIntel::run(int n)
       timer->stamp();
       if (force->pair) {
         force->pair->compute(eflag,vflag);
-        timer->stamp(TIME_PAIR);
+        timer->stamp(Timer::PAIR);
       }
 
       if (atom->molecular) {
@@ -375,25 +375,25 @@ void VerletSplitIntel::run(int n)
         if (force->angle) force->angle->compute(eflag,vflag);
         if (force->dihedral) force->dihedral->compute(eflag,vflag);
         if (force->improper) force->improper->compute(eflag,vflag);
-        timer->stamp(TIME_BOND);
+        timer->stamp(Timer::BOND);
       }
 
       #ifdef _LMP_INTEL_OFFLOAD
       if (sync_mode == 1) {
 	fix_intel->sync_coprocessor();
-	timer->stamp(TIME_PAIR);
+	timer->stamp(Timer::PAIR);
       }
       #endif
 
       if (force->newton) {
         comm->reverse_comm();
-        timer->stamp(TIME_COMM);
+        timer->stamp(Timer::COMM);
       }
 
       #ifdef _LMP_INTEL_OFFLOAD
       if (sync_mode == 2) {
 	fix_intel->sync_coprocessor();
-	timer->stamp(TIME_PAIR);
+	timer->stamp(Timer::PAIR);
       }
       #endif
 
@@ -406,14 +406,14 @@ void VerletSplitIntel::run(int n)
       if (force->kspace) {
         timer->stamp();
         force->kspace->compute(eflag,vflag);
-        timer->stamp(TIME_KSPACE);
+        timer->stamp(Timer::KSPACE);
       }
 
       // TIP4P PPPM puts forces on ghost atoms, so must reverse_comm()
 
       if (tip4p_flag && force->newton) {
         comm->reverse_comm();
-        timer->stamp(TIME_COMM);
+        timer->stamp(Timer::COMM);
       }
     }
 
@@ -425,14 +425,16 @@ void VerletSplitIntel::run(int n)
     // all output
 
     if (master) {
+      timer->stamp();
       if (n_post_force) modify->post_force(vflag);
       modify->final_integrate();
       if (n_end_of_step) modify->end_of_step();
+      timer->stamp(Timer::MODIFY);
 
       if (ntimestep == output->next) {
         timer->stamp();
         output->write(ntimestep);
-        timer->stamp(TIME_OUTPUT);
+        timer->stamp(Timer::OUTPUT);
       }
     }
   }
@@ -504,7 +506,7 @@ void VerletSplitIntel::rk_setup()
       atom->map_clear();
       comm->borders();
       if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
-      timer->stamp(TIME_COMM);
+      timer->stamp(Timer::COMM);
     }
   }
 }
@@ -553,7 +555,7 @@ void VerletSplitIntel::r2k_comm()
   if (tip4p_flag && !master) {
     timer->stamp();
     comm->forward_comm();
-    timer->stamp(TIME_COMM);
+    timer->stamp(Timer::COMM);
   }
 }
 
diff --git a/src/USER-OMP/angle_charmm_omp.cpp b/src/USER-OMP/angle_charmm_omp.cpp
index e346736948..228ca94dee 100644
--- a/src/USER-OMP/angle_charmm_omp.cpp
+++ b/src/USER-OMP/angle_charmm_omp.cpp
@@ -61,6 +61,7 @@ void AngleCharmmOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -77,6 +78,7 @@ void AngleCharmmOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/angle_class2_omp.cpp b/src/USER-OMP/angle_class2_omp.cpp
index bbe58ec48f..8f958b477c 100644
--- a/src/USER-OMP/angle_class2_omp.cpp
+++ b/src/USER-OMP/angle_class2_omp.cpp
@@ -61,6 +61,7 @@ void AngleClass2OMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -77,6 +78,7 @@ void AngleClass2OMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/angle_cosine_delta_omp.cpp b/src/USER-OMP/angle_cosine_delta_omp.cpp
index a642694222..f9891dbb3d 100644
--- a/src/USER-OMP/angle_cosine_delta_omp.cpp
+++ b/src/USER-OMP/angle_cosine_delta_omp.cpp
@@ -61,6 +61,7 @@ void AngleCosineDeltaOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -77,6 +78,7 @@ void AngleCosineDeltaOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/angle_cosine_omp.cpp b/src/USER-OMP/angle_cosine_omp.cpp
index 8aaddc528c..39ae3ce698 100644
--- a/src/USER-OMP/angle_cosine_omp.cpp
+++ b/src/USER-OMP/angle_cosine_omp.cpp
@@ -61,6 +61,7 @@ void AngleCosineOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -77,6 +78,7 @@ void AngleCosineOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/angle_cosine_periodic_omp.cpp b/src/USER-OMP/angle_cosine_periodic_omp.cpp
index d63dfd8ed1..311152f06c 100644
--- a/src/USER-OMP/angle_cosine_periodic_omp.cpp
+++ b/src/USER-OMP/angle_cosine_periodic_omp.cpp
@@ -63,6 +63,7 @@ void AngleCosinePeriodicOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -79,6 +80,7 @@ void AngleCosinePeriodicOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/angle_cosine_shift_exp_omp.cpp b/src/USER-OMP/angle_cosine_shift_exp_omp.cpp
index 5abf621050..3a3c31d625 100644
--- a/src/USER-OMP/angle_cosine_shift_exp_omp.cpp
+++ b/src/USER-OMP/angle_cosine_shift_exp_omp.cpp
@@ -61,6 +61,7 @@ void AngleCosineShiftExpOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -77,6 +78,7 @@ void AngleCosineShiftExpOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/angle_cosine_shift_omp.cpp b/src/USER-OMP/angle_cosine_shift_omp.cpp
index 42a7b8d734..f9f538e553 100644
--- a/src/USER-OMP/angle_cosine_shift_omp.cpp
+++ b/src/USER-OMP/angle_cosine_shift_omp.cpp
@@ -61,6 +61,7 @@ void AngleCosineShiftOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -77,6 +78,7 @@ void AngleCosineShiftOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/angle_cosine_squared_omp.cpp b/src/USER-OMP/angle_cosine_squared_omp.cpp
index 20ba91d1d5..c7d14468f5 100644
--- a/src/USER-OMP/angle_cosine_squared_omp.cpp
+++ b/src/USER-OMP/angle_cosine_squared_omp.cpp
@@ -61,6 +61,7 @@ void AngleCosineSquaredOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -77,6 +78,7 @@ void AngleCosineSquaredOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/angle_dipole_omp.cpp b/src/USER-OMP/angle_dipole_omp.cpp
index 4e67801671..cf391b2d60 100644
--- a/src/USER-OMP/angle_dipole_omp.cpp
+++ b/src/USER-OMP/angle_dipole_omp.cpp
@@ -65,6 +65,7 @@ void AngleDipoleOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -73,6 +74,7 @@ void AngleDipoleOMP::compute(int eflag, int vflag)
       else
         eval<0>(ifrom, ito, thr);
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 
diff --git a/src/USER-OMP/angle_fourier_omp.cpp b/src/USER-OMP/angle_fourier_omp.cpp
index 4d22a70b69..275eab29f1 100644
--- a/src/USER-OMP/angle_fourier_omp.cpp
+++ b/src/USER-OMP/angle_fourier_omp.cpp
@@ -61,6 +61,7 @@ void AngleFourierOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -77,6 +78,7 @@ void AngleFourierOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/angle_fourier_simple_omp.cpp b/src/USER-OMP/angle_fourier_simple_omp.cpp
index 9b27309e6c..5bdbfce05d 100644
--- a/src/USER-OMP/angle_fourier_simple_omp.cpp
+++ b/src/USER-OMP/angle_fourier_simple_omp.cpp
@@ -61,6 +61,7 @@ void AngleFourierSimpleOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -77,6 +78,7 @@ void AngleFourierSimpleOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/angle_harmonic_omp.cpp b/src/USER-OMP/angle_harmonic_omp.cpp
index 69b20f2691..917bc2d5b0 100644
--- a/src/USER-OMP/angle_harmonic_omp.cpp
+++ b/src/USER-OMP/angle_harmonic_omp.cpp
@@ -61,6 +61,7 @@ void AngleHarmonicOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -77,6 +78,7 @@ void AngleHarmonicOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/angle_quartic_omp.cpp b/src/USER-OMP/angle_quartic_omp.cpp
index 26f783db79..70383fd50a 100644
--- a/src/USER-OMP/angle_quartic_omp.cpp
+++ b/src/USER-OMP/angle_quartic_omp.cpp
@@ -61,6 +61,7 @@ void AngleQuarticOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -77,6 +78,7 @@ void AngleQuarticOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/angle_sdk_omp.cpp b/src/USER-OMP/angle_sdk_omp.cpp
index 523f0836c7..9b4a967bf3 100644
--- a/src/USER-OMP/angle_sdk_omp.cpp
+++ b/src/USER-OMP/angle_sdk_omp.cpp
@@ -63,6 +63,7 @@ void AngleSDKOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -79,6 +80,7 @@ void AngleSDKOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/angle_table_omp.cpp b/src/USER-OMP/angle_table_omp.cpp
index b45956d54e..465f4370fc 100644
--- a/src/USER-OMP/angle_table_omp.cpp
+++ b/src/USER-OMP/angle_table_omp.cpp
@@ -61,6 +61,7 @@ void AngleTableOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -77,6 +78,7 @@ void AngleTableOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/bond_class2_omp.cpp b/src/USER-OMP/bond_class2_omp.cpp
index 47e684da1a..69decfb32a 100644
--- a/src/USER-OMP/bond_class2_omp.cpp
+++ b/src/USER-OMP/bond_class2_omp.cpp
@@ -57,6 +57,7 @@ void BondClass2OMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -73,6 +74,7 @@ void BondClass2OMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/bond_fene_expand_omp.cpp b/src/USER-OMP/bond_fene_expand_omp.cpp
index 5a18461420..37cb8b403e 100644
--- a/src/USER-OMP/bond_fene_expand_omp.cpp
+++ b/src/USER-OMP/bond_fene_expand_omp.cpp
@@ -58,6 +58,7 @@ void BondFENEExpandOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -74,6 +75,7 @@ void BondFENEExpandOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/bond_fene_omp.cpp b/src/USER-OMP/bond_fene_omp.cpp
index bd7ed4a593..e92dae999c 100644
--- a/src/USER-OMP/bond_fene_omp.cpp
+++ b/src/USER-OMP/bond_fene_omp.cpp
@@ -58,6 +58,7 @@ void BondFENEOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -74,6 +75,7 @@ void BondFENEOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/bond_harmonic_omp.cpp b/src/USER-OMP/bond_harmonic_omp.cpp
index 7be27fbef5..b62fd53193 100644
--- a/src/USER-OMP/bond_harmonic_omp.cpp
+++ b/src/USER-OMP/bond_harmonic_omp.cpp
@@ -56,6 +56,7 @@ void BondHarmonicOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -72,6 +73,7 @@ void BondHarmonicOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/bond_harmonic_shift_cut_omp.cpp b/src/USER-OMP/bond_harmonic_shift_cut_omp.cpp
index 1999912fae..db2518e9a9 100644
--- a/src/USER-OMP/bond_harmonic_shift_cut_omp.cpp
+++ b/src/USER-OMP/bond_harmonic_shift_cut_omp.cpp
@@ -56,6 +56,7 @@ void BondHarmonicShiftCutOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -72,6 +73,7 @@ void BondHarmonicShiftCutOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/bond_harmonic_shift_omp.cpp b/src/USER-OMP/bond_harmonic_shift_omp.cpp
index 43498327c8..632db87301 100644
--- a/src/USER-OMP/bond_harmonic_shift_omp.cpp
+++ b/src/USER-OMP/bond_harmonic_shift_omp.cpp
@@ -56,6 +56,7 @@ void BondHarmonicShiftOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -72,6 +73,7 @@ void BondHarmonicShiftOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/bond_morse_omp.cpp b/src/USER-OMP/bond_morse_omp.cpp
index 2cae149e41..d03783920b 100644
--- a/src/USER-OMP/bond_morse_omp.cpp
+++ b/src/USER-OMP/bond_morse_omp.cpp
@@ -56,6 +56,7 @@ void BondMorseOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -72,6 +73,7 @@ void BondMorseOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/bond_nonlinear_omp.cpp b/src/USER-OMP/bond_nonlinear_omp.cpp
index 57d547bb04..2bc77de85c 100644
--- a/src/USER-OMP/bond_nonlinear_omp.cpp
+++ b/src/USER-OMP/bond_nonlinear_omp.cpp
@@ -56,6 +56,7 @@ void BondNonlinearOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -72,6 +73,7 @@ void BondNonlinearOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/bond_quartic_omp.cpp b/src/USER-OMP/bond_quartic_omp.cpp
index 28c16a7458..c0ddfd0b92 100644
--- a/src/USER-OMP/bond_quartic_omp.cpp
+++ b/src/USER-OMP/bond_quartic_omp.cpp
@@ -62,6 +62,7 @@ void BondQuarticOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -78,6 +79,7 @@ void BondQuarticOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/bond_table_omp.cpp b/src/USER-OMP/bond_table_omp.cpp
index 3803eaa02b..53226df4f7 100644
--- a/src/USER-OMP/bond_table_omp.cpp
+++ b/src/USER-OMP/bond_table_omp.cpp
@@ -56,6 +56,7 @@ void BondTableOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -72,6 +73,7 @@ void BondTableOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/dihedral_charmm_omp.cpp b/src/USER-OMP/dihedral_charmm_omp.cpp
index 50ddf5cbbc..311ef73e3b 100644
--- a/src/USER-OMP/dihedral_charmm_omp.cpp
+++ b/src/USER-OMP/dihedral_charmm_omp.cpp
@@ -67,6 +67,7 @@ void DihedralCharmmOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -83,6 +84,7 @@ void DihedralCharmmOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/dihedral_class2_omp.cpp b/src/USER-OMP/dihedral_class2_omp.cpp
index 701f8720cc..b4d1080cea 100644
--- a/src/USER-OMP/dihedral_class2_omp.cpp
+++ b/src/USER-OMP/dihedral_class2_omp.cpp
@@ -60,6 +60,7 @@ void DihedralClass2OMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -76,6 +77,7 @@ void DihedralClass2OMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp b/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp
index d17900d1e8..57a4561629 100644
--- a/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp
+++ b/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp
@@ -60,6 +60,7 @@ void DihedralCosineShiftExpOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -76,6 +77,7 @@ void DihedralCosineShiftExpOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/dihedral_fourier_omp.cpp b/src/USER-OMP/dihedral_fourier_omp.cpp
index f63f45c9a8..29d18c9fca 100644
--- a/src/USER-OMP/dihedral_fourier_omp.cpp
+++ b/src/USER-OMP/dihedral_fourier_omp.cpp
@@ -61,6 +61,7 @@ void DihedralFourierOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -77,6 +78,7 @@ void DihedralFourierOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/dihedral_harmonic_omp.cpp b/src/USER-OMP/dihedral_harmonic_omp.cpp
index 68134d3c7a..79daf186cb 100644
--- a/src/USER-OMP/dihedral_harmonic_omp.cpp
+++ b/src/USER-OMP/dihedral_harmonic_omp.cpp
@@ -60,6 +60,7 @@ void DihedralHarmonicOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -76,6 +77,7 @@ void DihedralHarmonicOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/dihedral_helix_omp.cpp b/src/USER-OMP/dihedral_helix_omp.cpp
index fdffeecdce..316fbfcb3f 100644
--- a/src/USER-OMP/dihedral_helix_omp.cpp
+++ b/src/USER-OMP/dihedral_helix_omp.cpp
@@ -63,6 +63,7 @@ void DihedralHelixOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -79,6 +80,7 @@ void DihedralHelixOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/dihedral_multi_harmonic_omp.cpp b/src/USER-OMP/dihedral_multi_harmonic_omp.cpp
index 4eabdddb85..e43c75d539 100644
--- a/src/USER-OMP/dihedral_multi_harmonic_omp.cpp
+++ b/src/USER-OMP/dihedral_multi_harmonic_omp.cpp
@@ -60,6 +60,7 @@ void DihedralMultiHarmonicOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -76,6 +77,7 @@ void DihedralMultiHarmonicOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/dihedral_nharmonic_omp.cpp b/src/USER-OMP/dihedral_nharmonic_omp.cpp
index 51a3f1b752..3cf0630736 100644
--- a/src/USER-OMP/dihedral_nharmonic_omp.cpp
+++ b/src/USER-OMP/dihedral_nharmonic_omp.cpp
@@ -60,6 +60,7 @@ void DihedralNHarmonicOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -76,6 +77,7 @@ void DihedralNHarmonicOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/dihedral_opls_omp.cpp b/src/USER-OMP/dihedral_opls_omp.cpp
index 41c9691dae..187bdae3a6 100644
--- a/src/USER-OMP/dihedral_opls_omp.cpp
+++ b/src/USER-OMP/dihedral_opls_omp.cpp
@@ -61,6 +61,7 @@ void DihedralOPLSOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -77,6 +78,7 @@ void DihedralOPLSOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/dihedral_quadratic_omp.cpp b/src/USER-OMP/dihedral_quadratic_omp.cpp
index 2fc72e7317..3157e3b40a 100644
--- a/src/USER-OMP/dihedral_quadratic_omp.cpp
+++ b/src/USER-OMP/dihedral_quadratic_omp.cpp
@@ -61,6 +61,7 @@ void DihedralQuadraticOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -77,6 +78,7 @@ void DihedralQuadraticOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/dihedral_table_omp.cpp b/src/USER-OMP/dihedral_table_omp.cpp
index fdc05e7ba0..1457f7b2bf 100644
--- a/src/USER-OMP/dihedral_table_omp.cpp
+++ b/src/USER-OMP/dihedral_table_omp.cpp
@@ -128,6 +128,7 @@ void DihedralTableOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -144,6 +145,7 @@ void DihedralTableOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/ewald_omp.cpp b/src/USER-OMP/ewald_omp.cpp
index ea74c8c748..6625a1dcb8 100644
--- a/src/USER-OMP/ewald_omp.cpp
+++ b/src/USER-OMP/ewald_omp.cpp
@@ -118,6 +118,7 @@ void EwaldOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, nlocal, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, 0, NULL, NULL, thr);
 
     for (i = ifrom; i < ito; i++) {
@@ -205,6 +206,7 @@ void EwaldOMP::compute(int eflag, int vflag)
           for (j = 0; j < 6; j++) vatom[i][j] *= q[i]*qscale;
     }
 
+    thr->timer(Timer::KSPACE);
     reduce_thr(this, eflag,vflag,thr);
   } // end of omp parallel region
 
diff --git a/src/USER-OMP/fix_omp.cpp b/src/USER-OMP/fix_omp.cpp
index 6717d04179..4a669addea 100644
--- a/src/USER-OMP/fix_omp.cpp
+++ b/src/USER-OMP/fix_omp.cpp
@@ -26,6 +26,7 @@
 #include "update.h"
 #include "integrate.h"
 #include "min.h"
+#include "timer.h"
 
 #include "fix_omp.h"
 #include "thr_data.h"
@@ -65,7 +66,7 @@ static int get_tid()
 
 /* ---------------------------------------------------------------------- */
 
-FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg) 
+FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg)
   :  Fix(lmp, narg, arg),
      thr(NULL), last_omp_style(NULL), last_pair_hybrid(NULL),
      _nthr(-1), _neighbor(true), _mixed(false), _reduced(true)
@@ -130,7 +131,7 @@ FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg)
       fprintf(screen,"using %s neighbor list subroutines\n", nmode);
       fprintf(screen,"prefer %s precision OpenMP force kernels\n", kmode);
     }
-    
+
     if (logfile) {
       if (reset_thr)
 	fprintf(logfile,"set %d OpenMP thread(s) per MPI task\n", nthreads);
@@ -146,11 +147,12 @@ FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg)
   thr = new ThrData *[nthreads];
   _nthr = nthreads;
 #if defined(_OPENMP)
-#pragma omp parallel default(none)
+#pragma omp parallel default(none) shared(lmp)
 #endif
   {
     const int tid = get_tid();
-    thr[tid] = new ThrData(tid);
+    Timer *t = new Timer(lmp);
+    thr[tid] = new ThrData(tid,t);
   }
 }
 
@@ -196,11 +198,17 @@ int FixOMP::setmask()
 void FixOMP::init()
 {
   // USER-OMP package cannot be used with atom_style template
-  
-  if (atom->molecular == 2) 
+  if (atom->molecular == 2)
     error->all(FLERR,"USER-OMP package does not (yet) work with "
                "atom_style template");
 
+  // reset per thread timer
+  for (int i=0; i < comm->nthreads; ++i) {
+    thr[i]->_timer_active=1;
+    thr[i]->timer(Timer::RESET);
+    thr[i]->_timer_active=-1;
+  }
+
   if ((strstr(update->integrate_style,"respa") != NULL)
       && (strstr(update->integrate_style,"respa/omp") == NULL))
     error->all(FLERR,"Need to use respa/omp for r-RESPA with /omp styles");
@@ -272,7 +280,7 @@ void FixOMP::init()
     CheckStyleForOMP(improper);
     CheckHybridForOMP(improper,Improper);
   }
-  
+
   if (kspace_split >= 0) {
     CheckStyleForOMP(kspace);
   }
@@ -325,6 +333,15 @@ void FixOMP::set_neighbor_omp()
 
 /* ---------------------------------------------------------------------- */
 
+void FixOMP::setup(int)
+{
+  // we are post the force compute in setup. turn on timers
+  for (int i=0; i < comm->nthreads; ++i)
+    thr[i]->_timer_active=0;
+}
+
+/* ---------------------------------------------------------------------- */
+
 // adjust size and clear out per thread accumulator arrays
 void FixOMP::pre_force(int)
 {
diff --git a/src/USER-OMP/fix_omp.h b/src/USER-OMP/fix_omp.h
index 656ab752a8..55e042dd52 100644
--- a/src/USER-OMP/fix_omp.h
+++ b/src/USER-OMP/fix_omp.h
@@ -36,6 +36,8 @@ class FixOMP : public Fix {
   virtual ~FixOMP();
   virtual int setmask();
   virtual void init();
+  virtual void setup(int);
+  virtual void min_setup(int flag) { setup(flag); }
   virtual void pre_force(int);
 
   virtual void setup_pre_force(int vflag)           { pre_force(vflag); }
diff --git a/src/USER-OMP/improper_class2_omp.cpp b/src/USER-OMP/improper_class2_omp.cpp
index d9e90218c0..2cf71f675c 100644
--- a/src/USER-OMP/improper_class2_omp.cpp
+++ b/src/USER-OMP/improper_class2_omp.cpp
@@ -60,6 +60,7 @@ void ImproperClass2OMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -76,6 +77,7 @@ void ImproperClass2OMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/improper_cossq_omp.cpp b/src/USER-OMP/improper_cossq_omp.cpp
index bf3dd678f6..745591a338 100644
--- a/src/USER-OMP/improper_cossq_omp.cpp
+++ b/src/USER-OMP/improper_cossq_omp.cpp
@@ -60,6 +60,7 @@ void ImproperCossqOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -76,6 +77,7 @@ void ImproperCossqOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/improper_cvff_omp.cpp b/src/USER-OMP/improper_cvff_omp.cpp
index a082a0d7bc..1d252fee5b 100644
--- a/src/USER-OMP/improper_cvff_omp.cpp
+++ b/src/USER-OMP/improper_cvff_omp.cpp
@@ -60,6 +60,7 @@ void ImproperCvffOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -76,6 +77,7 @@ void ImproperCvffOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/improper_fourier_omp.cpp b/src/USER-OMP/improper_fourier_omp.cpp
index 037eff48c9..49fcef23d4 100644
--- a/src/USER-OMP/improper_fourier_omp.cpp
+++ b/src/USER-OMP/improper_fourier_omp.cpp
@@ -60,6 +60,7 @@ void ImproperFourierOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -76,6 +77,7 @@ void ImproperFourierOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
@@ -115,18 +117,18 @@ void ImproperFourierOMP::eval(int nfrom, int nto, ThrData * const thr)
     vb3z = x[i4][2] - x[i1][2];
 
     add1_thr<EVFLAG,EFLAG,NEWTON_BOND>(i1,i2,i3,i4,type,
-				       vb1x,vb1y,vb1z, 
-				       vb2x,vb2y,vb2z, 
+				       vb1x,vb1y,vb1z,
+				       vb2x,vb2y,vb2z,
 				       vb3x,vb3y,vb3z,thr);
     if ( all[type] ) {
       add1_thr<EVFLAG,EFLAG,NEWTON_BOND>(i1,i4,i2,i3,type,
 					 vb3x,vb3y,vb3z,
-					 vb1x,vb1y,vb1z, 
-					 vb2x,vb2y,vb2z,thr); 
+					 vb1x,vb1y,vb1z,
+					 vb2x,vb2y,vb2z,thr);
       add1_thr<EVFLAG,EFLAG,NEWTON_BOND>(i1,i3,i4,i2,type,
-					 vb2x,vb2y,vb2z, 
+					 vb2x,vb2y,vb2z,
 					 vb3x,vb3y,vb3z,
-					 vb1x,vb1y,vb1z,thr); 
+					 vb1x,vb1y,vb1z,thr);
     }
   }
 }
diff --git a/src/USER-OMP/improper_harmonic_omp.cpp b/src/USER-OMP/improper_harmonic_omp.cpp
index 3104322ca8..e323b75e3f 100644
--- a/src/USER-OMP/improper_harmonic_omp.cpp
+++ b/src/USER-OMP/improper_harmonic_omp.cpp
@@ -60,6 +60,7 @@ void ImproperHarmonicOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -76,6 +77,7 @@ void ImproperHarmonicOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/improper_ring_omp.cpp b/src/USER-OMP/improper_ring_omp.cpp
index 1394c8986d..c1737a4f21 100644
--- a/src/USER-OMP/improper_ring_omp.cpp
+++ b/src/USER-OMP/improper_ring_omp.cpp
@@ -62,6 +62,7 @@ void ImproperRingOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -77,6 +78,7 @@ void ImproperRingOMP::compute(int eflag, int vflag)
         if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
         else eval<0,0,0>(ifrom, ito, thr);
       }
+    thr->timer(Timer::BOND);
       reduce_thr(this, eflag, vflag, thr);
     }
   } // end of omp parallel region
diff --git a/src/USER-OMP/improper_umbrella_omp.cpp b/src/USER-OMP/improper_umbrella_omp.cpp
index e0cdeaf3fc..689dbdfe65 100644
--- a/src/USER-OMP/improper_umbrella_omp.cpp
+++ b/src/USER-OMP/improper_umbrella_omp.cpp
@@ -60,6 +60,7 @@ void ImproperUmbrellaOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (inum > 0) {
@@ -76,6 +77,7 @@ void ImproperUmbrellaOMP::compute(int eflag, int vflag)
         else eval<0,0,0>(ifrom, ito, thr);
       }
     }
+    thr->timer(Timer::BOND);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/msm_cg_omp.cpp b/src/USER-OMP/msm_cg_omp.cpp
index 0af044e633..26926ab775 100644
--- a/src/USER-OMP/msm_cg_omp.cpp
+++ b/src/USER-OMP/msm_cg_omp.cpp
@@ -312,6 +312,7 @@ void MSMCGOMP::compute(int eflag, int vflag)
     const int tid = 0;
 #endif
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/msm_omp.cpp b/src/USER-OMP/msm_omp.cpp
index a29fa43f52..7a79dc865b 100755
--- a/src/USER-OMP/msm_omp.cpp
+++ b/src/USER-OMP/msm_omp.cpp
@@ -66,6 +66,7 @@ void MSMOMP::compute(int eflag, int vflag)
     const int tid = 0;
 #endif
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
@@ -74,12 +75,12 @@ void MSMOMP::compute(int eflag, int vflag)
    MSM direct part procedure for intermediate grid levels
 ------------------------------------------------------------------------- */
 
-void MSMOMP::direct(int n) 
+void MSMOMP::direct(int n)
 {
   // zero out electric potential
 
   memset(&(egrid[n][nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
-  
+
   // zero out virial
 
   if (vflag_atom) {
@@ -169,6 +170,8 @@ void MSMOMP::direct_eval(const int nn)
     int i,ifrom,ito,tid,icx,icy,icz,ix,iy,iz,k;
 
     loop_setup_thr(ifrom, ito, tid, inum, comm->nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
 
     for (i = ifrom; i < ito; ++i) {
 
@@ -298,6 +301,7 @@ void MSMOMP::direct_eval(const int nn)
         }
       }
     }
+    thr->timer(Timer::KSPACE);
   } // end of omp parallel region
 
   if (EFLAG_GLOBAL || VFLAG_GLOBAL) {
diff --git a/src/USER-OMP/neighbor_omp.h b/src/USER-OMP/neighbor_omp.h
index 2b2ad24fe5..53726109e8 100644
--- a/src/USER-OMP/neighbor_omp.h
+++ b/src/USER-OMP/neighbor_omp.h
@@ -18,14 +18,20 @@
 #include <omp.h>
 #endif
 
+#include "modify.h"
+#include "timer.h"
+#include "fix_omp.h"
+#include "thr_data.h"
+
 namespace LAMMPS_NS {
 
 // these macros hide some ugly and redundant OpenMP related stuff
 #if defined(_OPENMP)
 
 // make sure we have at least one page for each thread
-#define NEIGH_OMP_INIT                          \
-  const int nthreads = comm->nthreads;
+#define NEIGH_OMP_INIT                             \
+  const int nthreads = comm->nthreads;             \
+  const int ifix = modify->find_fix("package_omp")
 
 // get thread id and then assign each thread a fixed chunk of atoms
 #define NEIGH_OMP_SETUP(num)                    \
@@ -34,14 +40,18 @@ namespace LAMMPS_NS {
     const int idelta = 1 + num/nthreads;        \
     const int ifrom = tid*idelta;               \
     const int ito   = ((ifrom + idelta) > num)  \
-      ? num : (ifrom+idelta);
+      ? num : (ifrom+idelta);                   \
+    FixOMP *fix = static_cast<FixOMP *>(modify->fix[ifix]); \
+    ThrData *thr = fix->get_thr(tid);           \
+    thr->timer(Timer::START);
 
-#define NEIGH_OMP_CLOSE }
+#define NEIGH_OMP_CLOSE                         \
+      thr->timer(Timer::NEIGH);                 \
+    }
 
 #else /* !defined(_OPENMP) */
 
-#define NEIGH_OMP_INIT                          \
-  const int nthreads = comm->nthreads;
+#define NEIGH_OMP_INIT
 
 #define NEIGH_OMP_SETUP(num)                    \
   const int tid = 0;                            \
diff --git a/src/USER-OMP/pair_adp_omp.cpp b/src/USER-OMP/pair_adp_omp.cpp
index 41be063200..ec63dead89 100644
--- a/src/USER-OMP/pair_adp_omp.cpp
+++ b/src/USER-OMP/pair_adp_omp.cpp
@@ -71,6 +71,7 @@ void PairADPOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (force->newton_pair)
@@ -91,6 +92,7 @@ void PairADPOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
@@ -200,6 +202,7 @@ void PairADPOMP::eval(int iifrom, int iito, ThrData * const thr)
 
   if (NEWTON_PAIR) {
     // reduce per thread density
+    thr->timer(Timer::PAIR);
     data_reduce_thr(&(rho[0]), nall, comm->nthreads, 1, tid);
     data_reduce_thr(&(mu[0][0]), nall, comm->nthreads, 3, tid);
     data_reduce_thr(&(lambda[0][0]), nall, comm->nthreads, 6, tid);
@@ -217,6 +220,7 @@ void PairADPOMP::eval(int iifrom, int iito, ThrData * const thr)
 
   } else {
     // reduce per thread density
+    thr->timer(Timer::PAIR);
     data_reduce_thr(&(rho[0]), nlocal, comm->nthreads, 1, tid);
     data_reduce_thr(&(mu[0][0]), nlocal, comm->nthreads, 3, tid);
     data_reduce_thr(&(lambda[0][0]), nlocal, comm->nthreads, 6, tid);
diff --git a/src/USER-OMP/pair_airebo_omp.cpp b/src/USER-OMP/pair_airebo_omp.cpp
index 5907460b7f..66c56b859a 100644
--- a/src/USER-OMP/pair_airebo_omp.cpp
+++ b/src/USER-OMP/pair_airebo_omp.cpp
@@ -65,12 +65,14 @@ void PairAIREBOOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     FREBO_thr(ifrom,ito,evflag,eflag,vflag_atom,thr);
     if (ljflag) FLJ_thr(ifrom,ito,evflag,eflag,vflag_atom,thr);
     if (torflag) TORSION_thr(ifrom,ito,evflag,eflag,thr);
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_beck_omp.cpp b/src/USER-OMP/pair_beck_omp.cpp
index d314ab3f1d..3e261aa2fd 100644
--- a/src/USER-OMP/pair_beck_omp.cpp
+++ b/src/USER-OMP/pair_beck_omp.cpp
@@ -54,6 +54,7 @@ void PairBeckOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -69,6 +70,7 @@ void PairBeckOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_born_coul_long_omp.cpp b/src/USER-OMP/pair_born_coul_long_omp.cpp
index 099fadd98b..06e6da9d2c 100644
--- a/src/USER-OMP/pair_born_coul_long_omp.cpp
+++ b/src/USER-OMP/pair_born_coul_long_omp.cpp
@@ -60,6 +60,7 @@ void PairBornCoulLongOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -75,6 +76,7 @@ void PairBornCoulLongOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_born_coul_msm_omp.cpp b/src/USER-OMP/pair_born_coul_msm_omp.cpp
index 5ee4c1e376..bd282cfaa9 100755
--- a/src/USER-OMP/pair_born_coul_msm_omp.cpp
+++ b/src/USER-OMP/pair_born_coul_msm_omp.cpp
@@ -57,6 +57,7 @@ void PairBornCoulMSMOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -72,6 +73,7 @@ void PairBornCoulMSMOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_born_coul_wolf_omp.cpp b/src/USER-OMP/pair_born_coul_wolf_omp.cpp
index 7c06087179..9091332080 100644
--- a/src/USER-OMP/pair_born_coul_wolf_omp.cpp
+++ b/src/USER-OMP/pair_born_coul_wolf_omp.cpp
@@ -54,6 +54,7 @@ void PairBornCoulWolfOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -69,6 +70,7 @@ void PairBornCoulWolfOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_born_omp.cpp b/src/USER-OMP/pair_born_omp.cpp
index e17b2fbe94..01fdc1d012 100644
--- a/src/USER-OMP/pair_born_omp.cpp
+++ b/src/USER-OMP/pair_born_omp.cpp
@@ -52,6 +52,7 @@ void PairBornOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -67,6 +68,7 @@ void PairBornOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_brownian_omp.cpp b/src/USER-OMP/pair_brownian_omp.cpp
index 15a2c25491..56d9dee308 100644
--- a/src/USER-OMP/pair_brownian_omp.cpp
+++ b/src/USER-OMP/pair_brownian_omp.cpp
@@ -132,6 +132,7 @@ void PairBrownianOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     // generate a random number generator instance for
@@ -157,6 +158,7 @@ void PairBrownianOMP::compute(int eflag, int vflag)
       }
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_brownian_poly_omp.cpp b/src/USER-OMP/pair_brownian_poly_omp.cpp
index 09a7ef2e59..0ac97a935b 100644
--- a/src/USER-OMP/pair_brownian_poly_omp.cpp
+++ b/src/USER-OMP/pair_brownian_poly_omp.cpp
@@ -132,6 +132,7 @@ void PairBrownianPolyOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     // generate a random number generator instance for
@@ -150,6 +151,7 @@ void PairBrownianPolyOMP::compute(int eflag, int vflag)
       else eval<0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_buck_coul_cut_omp.cpp b/src/USER-OMP/pair_buck_coul_cut_omp.cpp
index 9590fcea6d..054493cc01 100644
--- a/src/USER-OMP/pair_buck_coul_cut_omp.cpp
+++ b/src/USER-OMP/pair_buck_coul_cut_omp.cpp
@@ -52,6 +52,7 @@ void PairBuckCoulCutOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -66,6 +67,7 @@ void PairBuckCoulCutOMP::compute(int eflag, int vflag)
       if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
       else eval<0,0,0>(ifrom, ito, thr);
     }
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_buck_coul_long_omp.cpp b/src/USER-OMP/pair_buck_coul_long_omp.cpp
index 6c52768ce7..6ab1a851b7 100644
--- a/src/USER-OMP/pair_buck_coul_long_omp.cpp
+++ b/src/USER-OMP/pair_buck_coul_long_omp.cpp
@@ -60,6 +60,7 @@ void PairBuckCoulLongOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -76,6 +77,7 @@ void PairBuckCoulLongOMP::compute(int eflag, int vflag)
     }
 
     // reduce per thread forces into global force array.
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_buck_coul_msm_omp.cpp b/src/USER-OMP/pair_buck_coul_msm_omp.cpp
index 129eca130b..bd59dccf7d 100755
--- a/src/USER-OMP/pair_buck_coul_msm_omp.cpp
+++ b/src/USER-OMP/pair_buck_coul_msm_omp.cpp
@@ -57,6 +57,7 @@ void PairBuckCoulMSMOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -73,6 +74,7 @@ void PairBuckCoulMSMOMP::compute(int eflag, int vflag)
     }
 
     // reduce per thread forces into global force array.
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_buck_long_coul_long_omp.cpp b/src/USER-OMP/pair_buck_long_coul_long_omp.cpp
index 5f5570543b..2443b0a61d 100644
--- a/src/USER-OMP/pair_buck_long_coul_long_omp.cpp
+++ b/src/USER-OMP/pair_buck_long_coul_long_omp.cpp
@@ -64,6 +64,7 @@ void PairBuckLongCoulLongOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (order6) {
@@ -302,8 +303,9 @@ void PairBuckLongCoulLongOMP::compute(int eflag, int vflag)
           }
         }
       }
-    } 
+    }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
@@ -326,8 +328,10 @@ void PairBuckLongCoulLongOMP::compute_inner()
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(0, 0, nall, 0, 0, thr);
     eval_inner(ifrom, ito, thr);
+    thr->timer(Timer::PAIR);
 
   }  // end of omp parallel region
 }
@@ -349,8 +353,10 @@ void PairBuckLongCoulLongOMP::compute_middle()
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(0, 0, nall, 0, 0, thr);
     eval_middle(ifrom, ito, thr);
+    thr->timer(Timer::PAIR);
 
   }  // end of omp parallel region
 }
@@ -377,6 +383,7 @@ void PairBuckLongCoulLongOMP::compute_outer(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (order6) {
@@ -615,8 +622,9 @@ void PairBuckLongCoulLongOMP::compute_outer(int eflag, int vflag)
           }
         }
       }
-    } 
+    }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_buck_omp.cpp b/src/USER-OMP/pair_buck_omp.cpp
index f4a014be69..0b8dbc7f68 100644
--- a/src/USER-OMP/pair_buck_omp.cpp
+++ b/src/USER-OMP/pair_buck_omp.cpp
@@ -52,6 +52,7 @@ void PairBuckOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -67,6 +68,7 @@ void PairBuckOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_cdeam_omp.cpp b/src/USER-OMP/pair_cdeam_omp.cpp
index 828cf53251..cb1f201a9e 100644
--- a/src/USER-OMP/pair_cdeam_omp.cpp
+++ b/src/USER-OMP/pair_cdeam_omp.cpp
@@ -86,6 +86,7 @@ void PairCDEAMOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (force->newton_pair)
@@ -128,12 +129,15 @@ void PairCDEAMOMP::compute(int eflag, int vflag)
       break;
 
     default:
+      {
 #if defined(_OPENMP)
 #pragma omp master
 #endif
-    error->all(FLERR,"unsupported eam/cd pair style variant");
+        error->all(FLERR,"unsupported eam/cd pair style variant");
+      }
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
@@ -227,6 +231,7 @@ void PairCDEAMOMP::eval(int iifrom, int iito, ThrData * const thr)
 
   if (NEWTON_PAIR) {
     // reduce per thread density
+    thr->timer(Timer::PAIR);
     data_reduce_thr(rho, nall, nthreads, 1, tid);
     data_reduce_thr(rhoB, nall, nthreads, 1, tid);
     if (CDEAMVERSION==1)
@@ -246,6 +251,7 @@ void PairCDEAMOMP::eval(int iifrom, int iito, ThrData * const thr)
 
   } else {
     // reduce per thread density
+    thr->timer(Timer::PAIR);
     data_reduce_thr(rho, nlocal, nthreads, 1, tid);
     data_reduce_thr(rhoB, nlocal, nthreads, 1, tid);
     if (CDEAMVERSION==1)
@@ -346,6 +352,7 @@ void PairCDEAMOMP::eval(int iifrom, int iito, ThrData * const thr)
     }
 
     if (NEWTON_PAIR) {
+    thr->timer(Timer::PAIR);
       data_reduce_thr(D_values, nall, nthreads, 1, tid);
 
       // wait until reduction is complete
@@ -361,6 +368,7 @@ void PairCDEAMOMP::eval(int iifrom, int iito, ThrData * const thr)
       sync_threads();
 
   } else {
+    thr->timer(Timer::PAIR);
       data_reduce_thr(D_values, nlocal, nthreads, 1, tid);
 
     // wait until reduction is complete
@@ -410,7 +418,9 @@ void PairCDEAMOMP::eval(int iifrom, int iito, ThrData * const thr)
         D_i = D_values[i] * h_prime_i / (2.0 * rho[i] * rho[i]);
       } else if(CDEAMVERSION == 2) {
         D_i = D_values[i];
-      } else ASSERT(false);
+      } else {
+        ASSERT(false);
+      }
     }
 
     for(jj = 0; jj < jnum; jj++) {
@@ -456,8 +466,9 @@ void PairCDEAMOMP::eval(int iifrom, int iito, ThrData * const thr)
             D_j = D_values[j] * h_prime_j / (2.0 * rho[j] * rho[j]);
           } else if(CDEAMVERSION == 2) {
             D_j = D_values[j];
-          } else ASSERT(false);
-
+          } else {
+            ASSERT(false);
+          }
           double t2 = -rhoB[j];
           if(itype == speciesB) t2 += rho[j];
           fpair += D_j * rhoip * t2;
@@ -491,8 +502,9 @@ void PairCDEAMOMP::eval(int iifrom, int iito, ThrData * const thr)
             double x_ij = 0.5 * (x_i + x_j);
             // Calculate h(x_ij) polynomial function.
             h = evalH(x_ij);
-          } else ASSERT(false);
-
+          } else {
+            ASSERT(false);
+          }
           fpair += h * phip;
           phi *= h;
         }
diff --git a/src/USER-OMP/pair_colloid_omp.cpp b/src/USER-OMP/pair_colloid_omp.cpp
index 7131d8c541..e3b6472c2a 100644
--- a/src/USER-OMP/pair_colloid_omp.cpp
+++ b/src/USER-OMP/pair_colloid_omp.cpp
@@ -55,6 +55,7 @@ void PairColloidOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -70,6 +71,7 @@ void PairColloidOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_comb_omp.cpp b/src/USER-OMP/pair_comb_omp.cpp
index 37237fb162..a075ee1f6a 100644
--- a/src/USER-OMP/pair_comb_omp.cpp
+++ b/src/USER-OMP/pair_comb_omp.cpp
@@ -61,6 +61,7 @@ void PairCombOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -73,6 +74,7 @@ void PairCombOMP::compute(int eflag, int vflag)
       }
     } else eval<0,0,0>(ifrom, ito, thr);
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_coul_cut_omp.cpp b/src/USER-OMP/pair_coul_cut_omp.cpp
index db5c341f00..1478618682 100644
--- a/src/USER-OMP/pair_coul_cut_omp.cpp
+++ b/src/USER-OMP/pair_coul_cut_omp.cpp
@@ -52,6 +52,7 @@ void PairCoulCutOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -67,6 +68,7 @@ void PairCoulCutOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_coul_cut_soft_omp.cpp b/src/USER-OMP/pair_coul_cut_soft_omp.cpp
index cb0eb7ae6e..da6760902e 100644
--- a/src/USER-OMP/pair_coul_cut_soft_omp.cpp
+++ b/src/USER-OMP/pair_coul_cut_soft_omp.cpp
@@ -52,6 +52,7 @@ void PairCoulCutSoftOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -67,6 +68,7 @@ void PairCoulCutSoftOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_coul_debye_omp.cpp b/src/USER-OMP/pair_coul_debye_omp.cpp
index d4a2538420..47d3dab99e 100644
--- a/src/USER-OMP/pair_coul_debye_omp.cpp
+++ b/src/USER-OMP/pair_coul_debye_omp.cpp
@@ -52,6 +52,7 @@ void PairCoulDebyeOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -67,6 +68,7 @@ void PairCoulDebyeOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_coul_diel_omp.cpp b/src/USER-OMP/pair_coul_diel_omp.cpp
index 8a02be5cb3..b100c5053b 100644
--- a/src/USER-OMP/pair_coul_diel_omp.cpp
+++ b/src/USER-OMP/pair_coul_diel_omp.cpp
@@ -52,6 +52,7 @@ void PairCoulDielOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -67,6 +68,7 @@ void PairCoulDielOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_coul_dsf_omp.cpp b/src/USER-OMP/pair_coul_dsf_omp.cpp
index 846be4ceaa..c85ede20ee 100644
--- a/src/USER-OMP/pair_coul_dsf_omp.cpp
+++ b/src/USER-OMP/pair_coul_dsf_omp.cpp
@@ -62,6 +62,7 @@ void PairCoulDSFOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -77,6 +78,7 @@ void PairCoulDSFOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_coul_long_omp.cpp b/src/USER-OMP/pair_coul_long_omp.cpp
index 069fb4d186..d77a88198b 100644
--- a/src/USER-OMP/pair_coul_long_omp.cpp
+++ b/src/USER-OMP/pair_coul_long_omp.cpp
@@ -61,6 +61,7 @@ void PairCoulLongOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -76,6 +77,7 @@ void PairCoulLongOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_coul_long_soft_omp.cpp b/src/USER-OMP/pair_coul_long_soft_omp.cpp
index ef06bd323f..1246bb10ae 100644
--- a/src/USER-OMP/pair_coul_long_soft_omp.cpp
+++ b/src/USER-OMP/pair_coul_long_soft_omp.cpp
@@ -60,6 +60,7 @@ void PairCoulLongSoftOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -75,6 +76,7 @@ void PairCoulLongSoftOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_coul_msm_omp.cpp b/src/USER-OMP/pair_coul_msm_omp.cpp
index 1af06208ef..870afb7806 100755
--- a/src/USER-OMP/pair_coul_msm_omp.cpp
+++ b/src/USER-OMP/pair_coul_msm_omp.cpp
@@ -58,6 +58,7 @@ void PairCoulMSMOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -73,6 +74,7 @@ void PairCoulMSMOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_coul_wolf_omp.cpp b/src/USER-OMP/pair_coul_wolf_omp.cpp
index a9ad976e4b..bcc19af0fe 100644
--- a/src/USER-OMP/pair_coul_wolf_omp.cpp
+++ b/src/USER-OMP/pair_coul_wolf_omp.cpp
@@ -54,6 +54,7 @@ void PairCoulWolfOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -69,6 +70,7 @@ void PairCoulWolfOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_dpd_omp.cpp b/src/USER-OMP/pair_dpd_omp.cpp
index d09a2fbdae..070c2dc4c2 100644
--- a/src/USER-OMP/pair_dpd_omp.cpp
+++ b/src/USER-OMP/pair_dpd_omp.cpp
@@ -77,6 +77,7 @@ void PairDPDOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     // generate a random number generator instance for
@@ -98,6 +99,7 @@ void PairDPDOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_dpd_tstat_omp.cpp b/src/USER-OMP/pair_dpd_tstat_omp.cpp
index 26d14ec8aa..fc725e8cf5 100644
--- a/src/USER-OMP/pair_dpd_tstat_omp.cpp
+++ b/src/USER-OMP/pair_dpd_tstat_omp.cpp
@@ -77,6 +77,7 @@ void PairDPDTstatOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     // generate a random number generator instance for
@@ -98,6 +99,7 @@ void PairDPDTstatOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_eam_omp.cpp b/src/USER-OMP/pair_eam_omp.cpp
index 808353b881..cff4daf5ea 100644
--- a/src/USER-OMP/pair_eam_omp.cpp
+++ b/src/USER-OMP/pair_eam_omp.cpp
@@ -66,6 +66,7 @@ void PairEAMOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (force->newton_pair)
@@ -86,6 +87,7 @@ void PairEAMOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
@@ -162,6 +164,7 @@ void PairEAMOMP::eval(int iifrom, int iito, ThrData * const thr)
 
   if (NEWTON_PAIR) {
     // reduce per thread density
+    thr->timer(Timer::PAIR);
     data_reduce_thr(rho, nall, nthreads, 1, tid);
 
     // wait until reduction is complete
@@ -176,6 +179,7 @@ void PairEAMOMP::eval(int iifrom, int iito, ThrData * const thr)
     sync_threads();
 
   } else {
+    thr->timer(Timer::PAIR);
     data_reduce_thr(rho, nlocal, nthreads, 1, tid);
 
     // wait until reduction is complete
diff --git a/src/USER-OMP/pair_edip_omp.cpp b/src/USER-OMP/pair_edip_omp.cpp
index 50ebaae31a..f86d8ebcbc 100644
--- a/src/USER-OMP/pair_edip_omp.cpp
+++ b/src/USER-OMP/pair_edip_omp.cpp
@@ -59,6 +59,7 @@ void PairEDIPOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -71,6 +72,7 @@ void PairEDIPOMP::compute(int eflag, int vflag)
       }
     } else eval<0,0,0>(ifrom, ito, thr);
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_eim_omp.cpp b/src/USER-OMP/pair_eim_omp.cpp
index 916bc99380..501c282f79 100644
--- a/src/USER-OMP/pair_eim_omp.cpp
+++ b/src/USER-OMP/pair_eim_omp.cpp
@@ -66,6 +66,7 @@ void PairEIMOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (force->newton_pair)
@@ -86,6 +87,7 @@ void PairEIMOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
@@ -162,6 +164,7 @@ void PairEIMOMP::eval(int iifrom, int iito, ThrData * const thr)
   // communicate and sum densities
   if (NEWTON_PAIR) {
     // reduce per thread density
+    thr->timer(Timer::PAIR);
     data_reduce_thr(rho, nall, nthreads, 1, tid);
 
     // wait until reduction is complete
@@ -176,6 +179,7 @@ void PairEIMOMP::eval(int iifrom, int iito, ThrData * const thr)
     }
 
   } else {
+    thr->timer(Timer::PAIR);
     data_reduce_thr(rho, nlocal, nthreads, 1, tid);
 
     // wait until reduction is complete
@@ -234,6 +238,7 @@ void PairEIMOMP::eval(int iifrom, int iito, ThrData * const thr)
   // communicate and sum modified densities
   if (NEWTON_PAIR) {
     // reduce per thread density
+    thr->timer(Timer::PAIR);
     data_reduce_thr(fp, nall, nthreads, 1, tid);
 
     // wait until reduction is complete
@@ -248,6 +253,7 @@ void PairEIMOMP::eval(int iifrom, int iito, ThrData * const thr)
     }
 
   } else {
+    thr->timer(Timer::PAIR);
     data_reduce_thr(fp, nlocal, nthreads, 1, tid);
 
     // wait until reduction is complete
diff --git a/src/USER-OMP/pair_gauss_cut_omp.cpp b/src/USER-OMP/pair_gauss_cut_omp.cpp
index cb0197c965..e80768d5af 100644
--- a/src/USER-OMP/pair_gauss_cut_omp.cpp
+++ b/src/USER-OMP/pair_gauss_cut_omp.cpp
@@ -52,6 +52,7 @@ void PairGaussCutOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -67,6 +68,7 @@ void PairGaussCutOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_gauss_omp.cpp b/src/USER-OMP/pair_gauss_omp.cpp
index 5255b6ea3d..4d8b477465 100644
--- a/src/USER-OMP/pair_gauss_omp.cpp
+++ b/src/USER-OMP/pair_gauss_omp.cpp
@@ -54,6 +54,7 @@ void PairGaussOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -69,6 +70,7 @@ void PairGaussOMP::compute(int eflag, int vflag)
       else occ = eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 
diff --git a/src/USER-OMP/pair_gayberne_omp.cpp b/src/USER-OMP/pair_gayberne_omp.cpp
index ed5cc92701..031b1d4ddd 100644
--- a/src/USER-OMP/pair_gayberne_omp.cpp
+++ b/src/USER-OMP/pair_gayberne_omp.cpp
@@ -54,6 +54,7 @@ void PairGayBerneOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -69,6 +70,7 @@ void PairGayBerneOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_gran_hertz_history_omp.cpp b/src/USER-OMP/pair_gran_hertz_history_omp.cpp
index e6c0063e3a..d684fde19d 100644
--- a/src/USER-OMP/pair_gran_hertz_history_omp.cpp
+++ b/src/USER-OMP/pair_gran_hertz_history_omp.cpp
@@ -77,6 +77,7 @@ void PairGranHertzHistoryOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag)
@@ -86,6 +87,7 @@ void PairGranHertzHistoryOMP::compute(int eflag, int vflag)
       if (shearupdate) eval<0,1>(ifrom, ito, thr);
       else eval<0,0>(ifrom, ito, thr);
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_gran_hooke_history_omp.cpp b/src/USER-OMP/pair_gran_hooke_history_omp.cpp
index 5a229afc64..3da16dea16 100644
--- a/src/USER-OMP/pair_gran_hooke_history_omp.cpp
+++ b/src/USER-OMP/pair_gran_hooke_history_omp.cpp
@@ -79,6 +79,7 @@ void PairGranHookeHistoryOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag)
@@ -88,6 +89,7 @@ void PairGranHookeHistoryOMP::compute(int eflag, int vflag)
       if (shearupdate) eval<0,1>(ifrom, ito, thr);
       else eval<0,0>(ifrom, ito, thr);
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
@@ -95,7 +97,7 @@ void PairGranHookeHistoryOMP::compute(int eflag, int vflag)
 template <int EVFLAG, int SHEARUPDATE>
 void PairGranHookeHistoryOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
-  int i,j,ii,jj,jnum,itype,jtype;
+  int i,j,ii,jj,jnum;
   double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz;
   double myshear[3];
   double radi,radj,radsum,rsq,r,rinv,rsqinv;
diff --git a/src/USER-OMP/pair_gran_hooke_omp.cpp b/src/USER-OMP/pair_gran_hooke_omp.cpp
index c99dfe6c27..c4e792708f 100644
--- a/src/USER-OMP/pair_gran_hooke_omp.cpp
+++ b/src/USER-OMP/pair_gran_hooke_omp.cpp
@@ -74,6 +74,7 @@ void PairGranHookeOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag)
@@ -83,6 +84,7 @@ void PairGranHookeOMP::compute(int eflag, int vflag)
       if (force->newton_pair) eval<0,1>(ifrom, ito, thr);
       else eval<0,0>(ifrom, ito, thr);
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
@@ -90,7 +92,7 @@ void PairGranHookeOMP::compute(int eflag, int vflag)
 template <int EVFLAG, int NEWTON_PAIR>
 void PairGranHookeOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
-  int i,j,ii,jj,jnum,itype,jtype;
+  int i,j,ii,jj,jnum;
   double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz;
   double radi,radj,radsum,rsq,r,rinv,rsqinv;
   double vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3;
diff --git a/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp b/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp
index 9dd82ae253..26c456c170 100644
--- a/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp
+++ b/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp
@@ -83,6 +83,7 @@ void PairHbondDreidingLJOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -98,6 +99,7 @@ void PairHbondDreidingLJOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 
diff --git a/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp b/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp
index 9efb5d63d2..557852fb2d 100644
--- a/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp
+++ b/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp
@@ -83,6 +83,7 @@ void PairHbondDreidingMorseOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -98,6 +99,7 @@ void PairHbondDreidingMorseOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 
diff --git a/src/USER-OMP/pair_line_lj_omp.cpp b/src/USER-OMP/pair_line_lj_omp.cpp
index 27c630166e..8a9cf0b0d2 100644
--- a/src/USER-OMP/pair_line_lj_omp.cpp
+++ b/src/USER-OMP/pair_line_lj_omp.cpp
@@ -79,6 +79,7 @@ void PairLineLJOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -94,6 +95,7 @@ void PairLineLJOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_lj96_cut_omp.cpp b/src/USER-OMP/pair_lj96_cut_omp.cpp
index 13a2252298..451d876a48 100644
--- a/src/USER-OMP/pair_lj96_cut_omp.cpp
+++ b/src/USER-OMP/pair_lj96_cut_omp.cpp
@@ -53,6 +53,7 @@ void PairLJ96CutOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -68,6 +69,7 @@ void PairLJ96CutOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp
index e30052ddb2..b8f9d68807 100644
--- a/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp
+++ b/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp
@@ -52,6 +52,7 @@ void PairLJCharmmCoulCharmmImplicitOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -67,6 +68,7 @@ void PairLJCharmmCoulCharmmImplicitOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp
index 8aff427b74..35f3867c9d 100644
--- a/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp
+++ b/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp
@@ -52,6 +52,7 @@ void PairLJCharmmCoulCharmmOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -67,6 +68,7 @@ void PairLJCharmmCoulCharmmOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp
index 41fd5e74cc..caaeb8f916 100644
--- a/src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp
+++ b/src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp
@@ -53,6 +53,7 @@ void PairLJCharmmCoulLongOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -68,6 +69,7 @@ void PairLJCharmmCoulLongOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_lj_charmm_coul_long_soft_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_long_soft_omp.cpp
index 9015161359..a1ccbffab4 100644
--- a/src/USER-OMP/pair_lj_charmm_coul_long_soft_omp.cpp
+++ b/src/USER-OMP/pair_lj_charmm_coul_long_soft_omp.cpp
@@ -53,6 +53,7 @@ void PairLJCharmmCoulLongSoftOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -68,6 +69,7 @@ void PairLJCharmmCoulLongSoftOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
@@ -127,7 +129,6 @@ void PairLJCharmmCoulLongSoftOMP::eval(int iifrom, int iito, ThrData * const thr
       const int jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
-        const double r2inv = 1.0/rsq;
 
         if (rsq < cut_coulsq) {
           const double A1 =  0.254829592;
diff --git a/src/USER-OMP/pair_lj_charmm_coul_msm_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_msm_omp.cpp
index f13515283a..2fcfe213a4 100755
--- a/src/USER-OMP/pair_lj_charmm_coul_msm_omp.cpp
+++ b/src/USER-OMP/pair_lj_charmm_coul_msm_omp.cpp
@@ -58,6 +58,7 @@ void PairLJCharmmCoulMSMOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -73,6 +74,7 @@ void PairLJCharmmCoulMSMOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
@@ -134,9 +136,9 @@ void PairLJCharmmCoulMSMOMP::eval(int iifrom, int iito, ThrData * const thr)
 
             const double r = sqrt(rsq);
             const double prefactor = qqrd2e * qtmp*q[j]/r;
-            const double egamma = 1.0 - (r/cut_coul)*force->kspace->gamma(r/cut_coul); 
-            const double fgamma = 1.0 + (rsq/cut_coulsq)*force->kspace->dgamma(r/cut_coul); 
-            forcecoul = prefactor * (fgamma - 1.0); 
+            const double egamma = 1.0 - (r/cut_coul)*force->kspace->gamma(r/cut_coul);
+            const double fgamma = 1.0 + (rsq/cut_coulsq)*force->kspace->dgamma(r/cut_coul);
+            forcecoul = prefactor * (fgamma - 1.0);
 
             if (EFLAG) ecoul = prefactor*egamma;
             if (sbindex) {
diff --git a/src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp b/src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp
index e51026ecb0..1ba825ab8e 100644
--- a/src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp
+++ b/src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp
@@ -52,6 +52,7 @@ void PairLJClass2CoulCutOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -67,6 +68,7 @@ void PairLJClass2CoulCutOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_lj_class2_coul_long_omp.cpp b/src/USER-OMP/pair_lj_class2_coul_long_omp.cpp
index 08efafa324..6b535a59a5 100644
--- a/src/USER-OMP/pair_lj_class2_coul_long_omp.cpp
+++ b/src/USER-OMP/pair_lj_class2_coul_long_omp.cpp
@@ -60,6 +60,7 @@ void PairLJClass2CoulLongOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -75,6 +76,7 @@ void PairLJClass2CoulLongOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_lj_class2_omp.cpp b/src/USER-OMP/pair_lj_class2_omp.cpp
index a2b88699e8..8c920ab9a8 100644
--- a/src/USER-OMP/pair_lj_class2_omp.cpp
+++ b/src/USER-OMP/pair_lj_class2_omp.cpp
@@ -52,6 +52,7 @@ void PairLJClass2OMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -67,6 +68,7 @@ void PairLJClass2OMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_lj_cubic_omp.cpp b/src/USER-OMP/pair_lj_cubic_omp.cpp
index 124bc87899..2caf4620a3 100644
--- a/src/USER-OMP/pair_lj_cubic_omp.cpp
+++ b/src/USER-OMP/pair_lj_cubic_omp.cpp
@@ -53,6 +53,7 @@ void PairLJCubicOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -68,6 +69,7 @@ void PairLJCubicOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp
index 3cbb2977c0..49c7ba128b 100644
--- a/src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp
+++ b/src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp
@@ -52,6 +52,7 @@ void PairLJCutCoulCutOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -67,6 +68,7 @@ void PairLJCutCoulCutOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_lj_cut_coul_cut_soft_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_cut_soft_omp.cpp
index ba4be2aca0..f6db06a51c 100644
--- a/src/USER-OMP/pair_lj_cut_coul_cut_soft_omp.cpp
+++ b/src/USER-OMP/pair_lj_cut_coul_cut_soft_omp.cpp
@@ -52,6 +52,7 @@ void PairLJCutCoulCutSoftOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -67,6 +68,7 @@ void PairLJCutCoulCutSoftOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp
index 28745e89c7..405755ecbb 100644
--- a/src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp
+++ b/src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp
@@ -52,6 +52,7 @@ void PairLJCutCoulDebyeOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -67,6 +68,7 @@ void PairLJCutCoulDebyeOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_lj_cut_coul_dsf_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_dsf_omp.cpp
index c7ffc8ef89..f33ea97e9c 100644
--- a/src/USER-OMP/pair_lj_cut_coul_dsf_omp.cpp
+++ b/src/USER-OMP/pair_lj_cut_coul_dsf_omp.cpp
@@ -62,6 +62,7 @@ void PairLJCutCoulDSFOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -77,6 +78,7 @@ void PairLJCutCoulDSFOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
@@ -156,7 +158,6 @@ void PairLJCutCoulDSFOMP::eval(int iifrom, int iito, ThrData * const thr)
           forcecoul = prefactor * (erfcc/r + 2.0*alpha/MY_PIS * erfcd + 
                                    r*f_shift) * r;
           if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
-          fpair = forcecoul * r2inv;
         } else forcecoul = 0.0;
 
         fpair = (forcecoul + factor_lj*forcelj) * r2inv;
diff --git a/src/USER-OMP/pair_lj_cut_coul_long_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_long_omp.cpp
index 6061374bba..842b84270c 100644
--- a/src/USER-OMP/pair_lj_cut_coul_long_omp.cpp
+++ b/src/USER-OMP/pair_lj_cut_coul_long_omp.cpp
@@ -61,6 +61,7 @@ void PairLJCutCoulLongOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -76,6 +77,7 @@ void PairLJCutCoulLongOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_lj_cut_coul_long_soft_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_long_soft_omp.cpp
index d23ba2fa5c..cdfd4a4c2d 100644
--- a/src/USER-OMP/pair_lj_cut_coul_long_soft_omp.cpp
+++ b/src/USER-OMP/pair_lj_cut_coul_long_soft_omp.cpp
@@ -61,6 +61,7 @@ void PairLJCutCoulLongSoftOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -76,6 +77,7 @@ void PairLJCutCoulLongSoftOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_lj_cut_coul_msm_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_msm_omp.cpp
index c355d4aff2..4b53b7784b 100755
--- a/src/USER-OMP/pair_lj_cut_coul_msm_omp.cpp
+++ b/src/USER-OMP/pair_lj_cut_coul_msm_omp.cpp
@@ -58,6 +58,7 @@ void PairLJCutCoulMSMOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -73,6 +74,7 @@ void PairLJCutCoulMSMOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_lj_cut_dipole_cut_omp.cpp b/src/USER-OMP/pair_lj_cut_dipole_cut_omp.cpp
index c7ec43af52..8c22961c1e 100755
--- a/src/USER-OMP/pair_lj_cut_dipole_cut_omp.cpp
+++ b/src/USER-OMP/pair_lj_cut_dipole_cut_omp.cpp
@@ -52,6 +52,7 @@ void PairLJCutDipoleCutOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -67,6 +68,7 @@ void PairLJCutDipoleCutOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_cut_omp.cpp
index 4d9f6af81f..5804e41119 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_lj_cut_omp.cpp
@@ -53,6 +53,7 @@ void PairLJCutOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -67,7 +68,7 @@ void PairLJCutOMP::compute(int eflag, int vflag)
       if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
       else eval<0,0,0>(ifrom, ito, thr);
     }
-
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_lj_cut_soft_omp.cpp b/src/USER-OMP/pair_lj_cut_soft_omp.cpp
index 953ebea366..92f0cc339f 100644
--- a/src/USER-OMP/pair_lj_cut_soft_omp.cpp
+++ b/src/USER-OMP/pair_lj_cut_soft_omp.cpp
@@ -53,6 +53,7 @@ void PairLJCutSoftOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -67,6 +68,7 @@ void PairLJCutSoftOMP::compute(int eflag, int vflag)
       if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
       else eval<0,0,0>(ifrom, ito, thr);
     }
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_lj_cut_tip4p_cut_omp.cpp b/src/USER-OMP/pair_lj_cut_tip4p_cut_omp.cpp
index 92ab9598b6..5bc0165120 100644
--- a/src/USER-OMP/pair_lj_cut_tip4p_cut_omp.cpp
+++ b/src/USER-OMP/pair_lj_cut_tip4p_cut_omp.cpp
@@ -52,7 +52,7 @@ PairLJCutTIP4PCutOMP::PairLJCutTIP4PCutOMP(LAMMPS *lmp) :
 
 /* ---------------------------------------------------------------------- */
 
-PairLJCutTIP4PCutOMP::~PairLJCutTIP4PCutOMP() 
+PairLJCutTIP4PCutOMP::~PairLJCutTIP4PCutOMP()
 {
   memory->destroy(hneigh_thr);
   memory->destroy(newsite_thr);
@@ -101,6 +101,7 @@ void PairLJCutTIP4PCutOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -113,6 +114,7 @@ void PairLJCutTIP4PCutOMP::compute(int eflag, int vflag)
       }
     } else eval<0,0,0>(ifrom, ito, thr);
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_lj_cut_tip4p_long_omp.cpp b/src/USER-OMP/pair_lj_cut_tip4p_long_omp.cpp
index 5dfd75421f..3646c0a2e8 100644
--- a/src/USER-OMP/pair_lj_cut_tip4p_long_omp.cpp
+++ b/src/USER-OMP/pair_lj_cut_tip4p_long_omp.cpp
@@ -52,7 +52,7 @@ PairLJCutTIP4PLongOMP::PairLJCutTIP4PLongOMP(LAMMPS *lmp) :
 
 /* ---------------------------------------------------------------------- */
 
-PairLJCutTIP4PLongOMP::~PairLJCutTIP4PLongOMP() 
+PairLJCutTIP4PLongOMP::~PairLJCutTIP4PLongOMP()
 {
   memory->destroy(hneigh_thr);
   memory->destroy(newsite_thr);
@@ -101,6 +101,7 @@ void PairLJCutTIP4PLongOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
   if (!ncoultablebits) {
@@ -125,6 +126,7 @@ void PairLJCutTIP4PLongOMP::compute(int eflag, int vflag)
     } else eval<0,0,0,0>(ifrom, ito, thr);
   }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_lj_cut_tip4p_long_soft_omp.cpp b/src/USER-OMP/pair_lj_cut_tip4p_long_soft_omp.cpp
index f31032788f..047b42775d 100644
--- a/src/USER-OMP/pair_lj_cut_tip4p_long_soft_omp.cpp
+++ b/src/USER-OMP/pair_lj_cut_tip4p_long_soft_omp.cpp
@@ -101,6 +101,7 @@ void PairLJCutTIP4PLongSoftOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -113,6 +114,7 @@ void PairLJCutTIP4PLongSoftOMP::compute(int eflag, int vflag)
       }
     } else eval<0,0,0>(ifrom, ito, thr);
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_lj_expand_omp.cpp b/src/USER-OMP/pair_lj_expand_omp.cpp
index 8f1f931822..187cc5f78d 100644
--- a/src/USER-OMP/pair_lj_expand_omp.cpp
+++ b/src/USER-OMP/pair_lj_expand_omp.cpp
@@ -52,6 +52,7 @@ void PairLJExpandOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -67,6 +68,7 @@ void PairLJExpandOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp b/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp
index 61fb667a69..3e3fbbc9dd 100644
--- a/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp
+++ b/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp
@@ -52,6 +52,7 @@ void PairLJGromacsCoulGromacsOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -67,6 +68,7 @@ void PairLJGromacsCoulGromacsOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_lj_gromacs_omp.cpp b/src/USER-OMP/pair_lj_gromacs_omp.cpp
index 964a9bda8d..618e20ab66 100644
--- a/src/USER-OMP/pair_lj_gromacs_omp.cpp
+++ b/src/USER-OMP/pair_lj_gromacs_omp.cpp
@@ -52,6 +52,7 @@ void PairLJGromacsOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -67,6 +68,7 @@ void PairLJGromacsOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_lj_long_coul_long_omp.cpp b/src/USER-OMP/pair_lj_long_coul_long_omp.cpp
index aafe9a3ab2..23a2bc3cc4 100644
--- a/src/USER-OMP/pair_lj_long_coul_long_omp.cpp
+++ b/src/USER-OMP/pair_lj_long_coul_long_omp.cpp
@@ -64,6 +64,7 @@ void PairLJLongCoulLongOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (order6) {
@@ -302,8 +303,9 @@ void PairLJLongCoulLongOMP::compute(int eflag, int vflag)
           }
         }
       }
-    } 
+    }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
@@ -324,8 +326,10 @@ void PairLJLongCoulLongOMP::compute_inner()
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(0, 0, nall, 0, 0, thr);
     eval_inner(ifrom, ito, thr);
+    thr->timer(Timer::PAIR);
 
   }  // end of omp parallel region
 }
@@ -347,8 +351,10 @@ void PairLJLongCoulLongOMP::compute_middle()
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(0, 0, nall, 0, 0, thr);
     eval_middle(ifrom, ito, thr);
+    thr->timer(Timer::PAIR);
 
   }  // end of omp parallel region
 }
@@ -375,6 +381,7 @@ void PairLJLongCoulLongOMP::compute_outer(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (order6) {
@@ -613,8 +620,9 @@ void PairLJLongCoulLongOMP::compute_outer(int eflag, int vflag)
           }
         }
       }
-    } 
+    }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
@@ -979,7 +987,7 @@ void PairLJLongCoulLongOMP::eval_outer(int iiform, int iito, ThrData * const thr
 {
   double evdwl,ecoul,fvirial,fpair;
   evdwl = ecoul = 0.0;
-  
+
   const double * const * const x = atom->x;
   double * const * const f = thr->get_f();
   const double * const q = atom->q;
@@ -993,7 +1001,7 @@ void PairLJLongCoulLongOMP::eval_outer(int iiform, int iito, ThrData * const thr
   double *f0 = f[0], *fi = f0;
 
   int *ilist = listouter->ilist;
-  
+
   int i, j, ii;
   int *jneigh, *jneighn, typei, typej, ni, respa_flag;
   double qi = 0.0, qri = 0.0;
@@ -1002,16 +1010,16 @@ void PairLJLongCoulLongOMP::eval_outer(int iiform, int iito, ThrData * const thr
   double g2 = g_ewald_6*g_ewald_6, g6 = g2*g2*g2, g8 = g6*g2;
   double respa_lj = 0.0, respa_coul = 0.0, frespa = 0.0;
   vector xi, d;
-  
+
   const double cut_in_off = cut_respa[2];
   const double cut_in_on = cut_respa[3];
-  
+
   const double cut_in_diff = cut_in_on - cut_in_off;
   const double cut_in_off_sq = cut_in_off*cut_in_off;
   const double cut_in_on_sq = cut_in_on*cut_in_on;
-  
+
   //ineighn = (ineigh = list->ilist)+list->inum;
-  
+
   for (ii = iiform; ii < iito; ++ii) {                        // loop over my atoms
     i = ilist[ii]; fi = f0+3*i;
     if (ORDER1) qri = (qi = q[i])*qqrd2e;                // initialize constants
@@ -1020,20 +1028,20 @@ void PairLJLongCoulLongOMP::eval_outer(int iiform, int iito, ThrData * const thr
     cutsqi = cutsq[typei]; cut_ljsqi = cut_ljsq[typei];
     memcpy(xi, x0+(i+(i<<1)), sizeof(vector));
     jneighn = (jneigh = listouter->firstneigh[i])+listouter->numneigh[i];
-    
+
     for (; jneigh<jneighn; ++jneigh) {                        // loop over neighbors
       j = *jneigh;
       ni = sbmask(j);
       j &= NEIGHMASK;
-      
+
       { register const double *xj = x0+(j+(j<<1));
         d[0] = xi[0] - xj[0];                                // pair vector
         d[1] = xi[1] - xj[1];
         d[2] = xi[2] - xj[2]; }
-      
+
       if ((rsq = vec_dot(d, d)) >= cutsqi[typej = type[j]]) continue;
       r2inv = 1.0/rsq;
-      
+
       frespa = 1.0;                                       // check whether and how to compute respa corrections
       respa_coul = 0;
       respa_lj = 0;
@@ -1042,7 +1050,7 @@ void PairLJLongCoulLongOMP::eval_outer(int iiform, int iito, ThrData * const thr
         register double rsw = (sqrt(rsq)-cut_in_off)/cut_in_diff;
         frespa = 1-rsw*rsw*(3.0-2.0*rsw);
       }
-      
+
       if (ORDER1 && (rsq < cut_coulsq)) {                // coulombic
         if (!CTABLE || rsq <= tabinnersq) {        // series real space
           register double r = sqrt(rsq), s = qri*q[j];
@@ -1083,7 +1091,7 @@ void PairLJLongCoulLongOMP::eval_outer(int iiform, int iito, ThrData * const thr
           }
         }
       }
-       
+
       else force_coul = respa_coul = ecoul = 0.0;
 
       if (rsq < cut_ljsqi[typej]) {                        // lennard-jones
@@ -1139,9 +1147,9 @@ void PairLJLongCoulLongOMP::eval_outer(int iiform, int iito, ThrData * const thr
         }
       }
       else force_lj = respa_lj = evdwl = 0.0;
-      
+
       fpair = (force_coul+force_lj)*r2inv;
-      
+
       if (NEWTON_PAIR || j < nlocal) {
         register double *fj = f0+(j+(j<<1)), f;
         fi[0] += f = d[0]*fpair; fj[0] -= f;
@@ -1153,7 +1161,7 @@ void PairLJLongCoulLongOMP::eval_outer(int iiform, int iito, ThrData * const thr
         fi[1] += d[1]*fpair;
         fi[2] += d[2]*fpair;
       }
-      
+
       if (EVFLAG) {
         fvirial = (force_coul + force_lj + respa_coul + respa_lj)*r2inv;
         ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
diff --git a/src/USER-OMP/pair_lj_long_coul_long_omp.h b/src/USER-OMP/pair_lj_long_coul_long_omp.h
index f10cbfd510..dd47231a9c 100644
--- a/src/USER-OMP/pair_lj_long_coul_long_omp.h
+++ b/src/USER-OMP/pair_lj_long_coul_long_omp.h
@@ -55,7 +55,7 @@ class PairLJLongCoulLongOMP : public PairLJLongCoulLong, public ThrOMP {
   void eval_inner(int, int, ThrData *const);
   void eval_middle(int, int, ThrData *const);
 
-  
+
 
 };
 
diff --git a/src/USER-OMP/pair_lj_long_tip4p_long_omp.cpp b/src/USER-OMP/pair_lj_long_tip4p_long_omp.cpp
index cace817fe5..4c86ce7d33 100644
--- a/src/USER-OMP/pair_lj_long_tip4p_long_omp.cpp
+++ b/src/USER-OMP/pair_lj_long_tip4p_long_omp.cpp
@@ -104,6 +104,7 @@ void PairLJLongTIP4PLongOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (order6) {
@@ -342,8 +343,9 @@ void PairLJLongTIP4PLongOMP::compute(int eflag, int vflag)
           }
         }
       }
-    } 
+    }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
@@ -385,8 +387,10 @@ void PairLJLongTIP4PLongOMP::compute_inner()
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(0, 0, nall, 0, 0, thr);
     eval_inner(ifrom, ito, thr);
+    thr->timer(Timer::PAIR);
 
   }  // end of omp parallel region
 }
@@ -408,8 +412,10 @@ void PairLJLongTIP4PLongOMP::compute_middle()
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(0, 0, nall, 0, 0, thr);
     eval_middle(ifrom, ito, thr);
+    thr->timer(Timer::PAIR);
 
   }  // end of omp parallel region
 }
@@ -460,6 +466,7 @@ void PairLJLongTIP4PLongOMP::compute_outer(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (order6) {
@@ -698,8 +705,9 @@ void PairLJLongTIP4PLongOMP::compute_outer(int eflag, int vflag)
           }
         }
       }
-    } 
+    }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_lj_long_tip4p_long_omp.h b/src/USER-OMP/pair_lj_long_tip4p_long_omp.h
index af0e3053b4..171edb6ddd 100644
--- a/src/USER-OMP/pair_lj_long_tip4p_long_omp.h
+++ b/src/USER-OMP/pair_lj_long_tip4p_long_omp.h
@@ -62,7 +62,7 @@ class PairLJLongTIP4PLongOMP : public PairLJLongTIP4PLong, public ThrOMP {
   void compute_newsite_thr(const dbl3_t &, const dbl3_t &,
                            const dbl3_t &, dbl3_t &) const;
 
-  
+
 
 };
 
diff --git a/src/USER-OMP/pair_lj_sdk_coul_long_omp.cpp b/src/USER-OMP/pair_lj_sdk_coul_long_omp.cpp
index 2c85f0f2e0..c86a1f6c0e 100644
--- a/src/USER-OMP/pair_lj_sdk_coul_long_omp.cpp
+++ b/src/USER-OMP/pair_lj_sdk_coul_long_omp.cpp
@@ -54,6 +54,7 @@ void PairLJSDKCoulLongOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -69,6 +70,7 @@ void PairLJSDKCoulLongOMP::compute(int eflag, int vflag)
       else eval_thr<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_lj_sdk_coul_msm_omp.cpp b/src/USER-OMP/pair_lj_sdk_coul_msm_omp.cpp
index 62d1ae56fd..95013932e8 100644
--- a/src/USER-OMP/pair_lj_sdk_coul_msm_omp.cpp
+++ b/src/USER-OMP/pair_lj_sdk_coul_msm_omp.cpp
@@ -60,6 +60,7 @@ void PairLJSDKCoulMSMOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -75,6 +76,7 @@ void PairLJSDKCoulMSMOMP::compute(int eflag, int vflag)
       else eval_msm_thr<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_lj_sdk_omp.cpp b/src/USER-OMP/pair_lj_sdk_omp.cpp
index da2eb492ff..ca32528ee0 100644
--- a/src/USER-OMP/pair_lj_sdk_omp.cpp
+++ b/src/USER-OMP/pair_lj_sdk_omp.cpp
@@ -56,6 +56,7 @@ void PairLJSDKOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -71,6 +72,7 @@ void PairLJSDKOMP::compute(int eflag, int vflag)
       else eval_thr<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_lj_sf_dipole_sf_omp.cpp b/src/USER-OMP/pair_lj_sf_dipole_sf_omp.cpp
index dbaf1a9639..572c775467 100755
--- a/src/USER-OMP/pair_lj_sf_dipole_sf_omp.cpp
+++ b/src/USER-OMP/pair_lj_sf_dipole_sf_omp.cpp
@@ -52,6 +52,7 @@ void PairLJSFDipoleSFOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -67,6 +68,7 @@ void PairLJSFDipoleSFOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_lj_sf_omp.cpp b/src/USER-OMP/pair_lj_sf_omp.cpp
index 82595c7578..c1bdbcc93e 100644
--- a/src/USER-OMP/pair_lj_sf_omp.cpp
+++ b/src/USER-OMP/pair_lj_sf_omp.cpp
@@ -52,6 +52,7 @@ void PairLJShiftedForceOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -67,6 +68,7 @@ void PairLJShiftedForceOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_lj_smooth_linear_omp.cpp b/src/USER-OMP/pair_lj_smooth_linear_omp.cpp
index 7a8440fa58..6ad61b0dbf 100644
--- a/src/USER-OMP/pair_lj_smooth_linear_omp.cpp
+++ b/src/USER-OMP/pair_lj_smooth_linear_omp.cpp
@@ -52,6 +52,7 @@ void PairLJSmoothLinearOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -67,6 +68,7 @@ void PairLJSmoothLinearOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_lj_smooth_omp.cpp b/src/USER-OMP/pair_lj_smooth_omp.cpp
index b38ff2311b..d2eee11102 100644
--- a/src/USER-OMP/pair_lj_smooth_omp.cpp
+++ b/src/USER-OMP/pair_lj_smooth_omp.cpp
@@ -52,6 +52,7 @@ void PairLJSmoothOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -67,6 +68,7 @@ void PairLJSmoothOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_lubricate_omp.cpp b/src/USER-OMP/pair_lubricate_omp.cpp
index 877be4bed2..3da80c8c5a 100644
--- a/src/USER-OMP/pair_lubricate_omp.cpp
+++ b/src/USER-OMP/pair_lubricate_omp.cpp
@@ -122,6 +122,7 @@ void PairLubricateOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (flaglog) {
@@ -142,6 +143,7 @@ void PairLubricateOMP::compute(int eflag, int vflag)
       }
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_lubricate_poly_omp.cpp b/src/USER-OMP/pair_lubricate_poly_omp.cpp
index a9161805b6..abe27b7434 100644
--- a/src/USER-OMP/pair_lubricate_poly_omp.cpp
+++ b/src/USER-OMP/pair_lubricate_poly_omp.cpp
@@ -120,6 +120,7 @@ void PairLubricatePolyOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (flaglog) {
@@ -144,6 +145,7 @@ void PairLubricatePolyOMP::compute(int eflag, int vflag)
       }
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_meam_spline_omp.cpp b/src/USER-OMP/pair_meam_spline_omp.cpp
index 69eeb1e120..ebe576e59b 100644
--- a/src/USER-OMP/pair_meam_spline_omp.cpp
+++ b/src/USER-OMP/pair_meam_spline_omp.cpp
@@ -66,6 +66,7 @@ void PairMEAMSplineOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     thr->init_eam(nall,Uprime_values);
@@ -80,6 +81,7 @@ void PairMEAMSplineOMP::compute(int eflag, int vflag)
       eval<0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
@@ -250,6 +252,7 @@ void PairMEAMSplineOMP::eval(int iifrom, int iito, ThrData * const thr)
   sync_threads();
 
   // reduce per thread density
+    thr->timer(Timer::PAIR);
   data_reduce_thr(Uprime_values, nall, nthreads, 1, tid);
 
   // wait until reduction is complete so that master thread
diff --git a/src/USER-OMP/pair_morse_omp.cpp b/src/USER-OMP/pair_morse_omp.cpp
index 448b265de7..b27829d897 100644
--- a/src/USER-OMP/pair_morse_omp.cpp
+++ b/src/USER-OMP/pair_morse_omp.cpp
@@ -52,6 +52,7 @@ void PairMorseOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -67,6 +68,7 @@ void PairMorseOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_nb3b_harmonic_omp.cpp b/src/USER-OMP/pair_nb3b_harmonic_omp.cpp
index 1877b26fbc..b36c4dd77d 100644
--- a/src/USER-OMP/pair_nb3b_harmonic_omp.cpp
+++ b/src/USER-OMP/pair_nb3b_harmonic_omp.cpp
@@ -52,6 +52,7 @@ void PairNb3bHarmonicOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -62,6 +63,7 @@ void PairNb3bHarmonicOMP::compute(int eflag, int vflag)
       }
     } else eval<0,0>(ifrom, ito, thr);
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_nm_cut_coul_cut_omp.cpp b/src/USER-OMP/pair_nm_cut_coul_cut_omp.cpp
index b3fafc1693..f4fa780c60 100644
--- a/src/USER-OMP/pair_nm_cut_coul_cut_omp.cpp
+++ b/src/USER-OMP/pair_nm_cut_coul_cut_omp.cpp
@@ -52,6 +52,7 @@ void PairNMCutCoulCutOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -67,6 +68,7 @@ void PairNMCutCoulCutOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_nm_cut_coul_long_omp.cpp b/src/USER-OMP/pair_nm_cut_coul_long_omp.cpp
index b198693680..c5e7ebd621 100644
--- a/src/USER-OMP/pair_nm_cut_coul_long_omp.cpp
+++ b/src/USER-OMP/pair_nm_cut_coul_long_omp.cpp
@@ -60,6 +60,7 @@ void PairNMCutCoulLongOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -75,6 +76,7 @@ void PairNMCutCoulLongOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_nm_cut_omp.cpp b/src/USER-OMP/pair_nm_cut_omp.cpp
index 27f1d46931..a988633acf 100644
--- a/src/USER-OMP/pair_nm_cut_omp.cpp
+++ b/src/USER-OMP/pair_nm_cut_omp.cpp
@@ -52,6 +52,7 @@ void PairNMCutOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -66,6 +67,7 @@ void PairNMCutOMP::compute(int eflag, int vflag)
       if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
       else eval<0,0,0>(ifrom, ito, thr);
     }
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_peri_lps_omp.cpp b/src/USER-OMP/pair_peri_lps_omp.cpp
index 640369ac03..92b7037127 100644
--- a/src/USER-OMP/pair_peri_lps_omp.cpp
+++ b/src/USER-OMP/pair_peri_lps_omp.cpp
@@ -71,6 +71,7 @@ void PairPeriLPSOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -86,6 +87,7 @@ void PairPeriLPSOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_peri_pmb_omp.cpp b/src/USER-OMP/pair_peri_pmb_omp.cpp
index 86d3cc20f2..8895b3ba6c 100644
--- a/src/USER-OMP/pair_peri_pmb_omp.cpp
+++ b/src/USER-OMP/pair_peri_pmb_omp.cpp
@@ -67,6 +67,7 @@ void PairPeriPMBOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -82,6 +83,7 @@ void PairPeriPMBOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_resquared_omp.cpp b/src/USER-OMP/pair_resquared_omp.cpp
index 4e3bfded47..c38070304d 100644
--- a/src/USER-OMP/pair_resquared_omp.cpp
+++ b/src/USER-OMP/pair_resquared_omp.cpp
@@ -54,6 +54,7 @@ void PairRESquaredOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -69,6 +70,7 @@ void PairRESquaredOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_soft_omp.cpp b/src/USER-OMP/pair_soft_omp.cpp
index f262e1d411..51cac9106b 100644
--- a/src/USER-OMP/pair_soft_omp.cpp
+++ b/src/USER-OMP/pair_soft_omp.cpp
@@ -56,6 +56,7 @@ void PairSoftOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -71,6 +72,7 @@ void PairSoftOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_sw_omp.cpp b/src/USER-OMP/pair_sw_omp.cpp
index 1506359a7a..c90424e735 100644
--- a/src/USER-OMP/pair_sw_omp.cpp
+++ b/src/USER-OMP/pair_sw_omp.cpp
@@ -52,6 +52,7 @@ void PairSWOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -62,6 +63,7 @@ void PairSWOMP::compute(int eflag, int vflag)
       }
     } else eval<0,0>(ifrom, ito, thr);
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_table_omp.cpp b/src/USER-OMP/pair_table_omp.cpp
index 9e1340b300..f8c6a9f186 100644
--- a/src/USER-OMP/pair_table_omp.cpp
+++ b/src/USER-OMP/pair_table_omp.cpp
@@ -53,6 +53,7 @@ void PairTableOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -68,6 +69,7 @@ void PairTableOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_tersoff_mod_omp.cpp b/src/USER-OMP/pair_tersoff_mod_omp.cpp
index 02e4962567..907bb9e078 100644
--- a/src/USER-OMP/pair_tersoff_mod_omp.cpp
+++ b/src/USER-OMP/pair_tersoff_mod_omp.cpp
@@ -52,6 +52,7 @@ void PairTersoffMODOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -64,6 +65,7 @@ void PairTersoffMODOMP::compute(int eflag, int vflag)
       }
     } else eval<0,0,0>(ifrom, ito, thr);
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_tersoff_omp.cpp b/src/USER-OMP/pair_tersoff_omp.cpp
index 8d89caa5e3..ff0f41b282 100644
--- a/src/USER-OMP/pair_tersoff_omp.cpp
+++ b/src/USER-OMP/pair_tersoff_omp.cpp
@@ -52,6 +52,7 @@ void PairTersoffOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -64,6 +65,7 @@ void PairTersoffOMP::compute(int eflag, int vflag)
       }
     } else eval<0,0,0>(ifrom, ito, thr);
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_tersoff_table_omp.cpp b/src/USER-OMP/pair_tersoff_table_omp.cpp
index 86d8cdecc0..bd786c7ca9 100644
--- a/src/USER-OMP/pair_tersoff_table_omp.cpp
+++ b/src/USER-OMP/pair_tersoff_table_omp.cpp
@@ -77,6 +77,7 @@ void PairTersoffTableOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag)
@@ -84,6 +85,7 @@ void PairTersoffTableOMP::compute(int eflag, int vflag)
       else eval<1,0>(ifrom, ito, thr);
     else eval<0,0>(ifrom, ito, thr);
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_tip4p_cut_omp.cpp b/src/USER-OMP/pair_tip4p_cut_omp.cpp
index d9f75cba9a..72c582acb8 100644
--- a/src/USER-OMP/pair_tip4p_cut_omp.cpp
+++ b/src/USER-OMP/pair_tip4p_cut_omp.cpp
@@ -52,7 +52,7 @@ PairTIP4PCutOMP::PairTIP4PCutOMP(LAMMPS *lmp) :
 
 /* ---------------------------------------------------------------------- */
 
-PairTIP4PCutOMP::~PairTIP4PCutOMP() 
+PairTIP4PCutOMP::~PairTIP4PCutOMP()
 {
   memory->destroy(hneigh_thr);
   memory->destroy(newsite_thr);
@@ -100,6 +100,7 @@ void PairTIP4PCutOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -112,6 +113,7 @@ void PairTIP4PCutOMP::compute(int eflag, int vflag)
       }
     } else eval<0,0,0>(ifrom, ito, thr);
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_tip4p_long_omp.cpp b/src/USER-OMP/pair_tip4p_long_omp.cpp
index fee669e53b..3476ed5928 100644
--- a/src/USER-OMP/pair_tip4p_long_omp.cpp
+++ b/src/USER-OMP/pair_tip4p_long_omp.cpp
@@ -52,7 +52,7 @@ PairTIP4PLongOMP::PairTIP4PLongOMP(LAMMPS *lmp) :
 
 /* ---------------------------------------------------------------------- */
 
-PairTIP4PLongOMP::~PairTIP4PLongOMP() 
+PairTIP4PLongOMP::~PairTIP4PLongOMP()
 {
   memory->destroy(hneigh_thr);
   memory->destroy(newsite_thr);
@@ -101,6 +101,7 @@ void PairTIP4PLongOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
   if (!ncoultablebits) {
@@ -125,6 +126,7 @@ void PairTIP4PLongOMP::compute(int eflag, int vflag)
     } else eval<0,0,0,0>(ifrom, ito, thr);
   }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_tip4p_long_soft_omp.cpp b/src/USER-OMP/pair_tip4p_long_soft_omp.cpp
index 6d76e638b9..f693e0a06a 100644
--- a/src/USER-OMP/pair_tip4p_long_soft_omp.cpp
+++ b/src/USER-OMP/pair_tip4p_long_soft_omp.cpp
@@ -101,6 +101,7 @@ void PairTIP4PLongSoftOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -113,6 +114,7 @@ void PairTIP4PLongSoftOMP::compute(int eflag, int vflag)
       }
     } else eval<0,0,0>(ifrom, ito, thr);
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_tri_lj_omp.cpp b/src/USER-OMP/pair_tri_lj_omp.cpp
index 16dce231ba..3e1e1fe93f 100644
--- a/src/USER-OMP/pair_tri_lj_omp.cpp
+++ b/src/USER-OMP/pair_tri_lj_omp.cpp
@@ -93,6 +93,7 @@ void PairTriLJOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -108,6 +109,7 @@ void PairTriLJOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_yukawa_colloid_omp.cpp b/src/USER-OMP/pair_yukawa_colloid_omp.cpp
index 36b5d82c0b..c3c73fab58 100644
--- a/src/USER-OMP/pair_yukawa_colloid_omp.cpp
+++ b/src/USER-OMP/pair_yukawa_colloid_omp.cpp
@@ -52,6 +52,7 @@ void PairYukawaColloidOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -67,6 +68,7 @@ void PairYukawaColloidOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_yukawa_omp.cpp b/src/USER-OMP/pair_yukawa_omp.cpp
index ba2345983c..9bb0dea9c3 100644
--- a/src/USER-OMP/pair_yukawa_omp.cpp
+++ b/src/USER-OMP/pair_yukawa_omp.cpp
@@ -52,6 +52,7 @@ void PairYukawaOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -67,6 +68,7 @@ void PairYukawaOMP::compute(int eflag, int vflag)
       else eval<0,0,0>(ifrom, ito, thr);
     }
 
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pair_zbl_omp.cpp b/src/USER-OMP/pair_zbl_omp.cpp
index 454934f679..281ee52acb 100644
--- a/src/USER-OMP/pair_zbl_omp.cpp
+++ b/src/USER-OMP/pair_zbl_omp.cpp
@@ -53,6 +53,7 @@ void PairZBLOMP::compute(int eflag, int vflag)
 
     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
@@ -67,6 +68,7 @@ void PairZBLOMP::compute(int eflag, int vflag)
       if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
       else eval<0,0,0>(ifrom, ito, thr);
     }
+    thr->timer(Timer::PAIR);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
diff --git a/src/USER-OMP/pppm_cg_omp.cpp b/src/USER-OMP/pppm_cg_omp.cpp
index 3a0c3f5806..021765d14b 100644
--- a/src/USER-OMP/pppm_cg_omp.cpp
+++ b/src/USER-OMP/pppm_cg_omp.cpp
@@ -138,6 +138,8 @@ void PPPMCGOMP::compute_gf_ik()
     int k,l,m,nx,ny,nz,kper,lper,mper,n,nfrom,nto,tid;
 
     loop_setup_thr(nfrom, nto, tid, nfft, comm->nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
 
     for (n = nfrom; n < nto; ++n) {
       m = n / (numl*numk);
@@ -190,6 +192,7 @@ void PPPMCGOMP::compute_gf_ik()
 	greensfn[n] = numerator*sum1/denominator;
       } else greensfn[n] = 0.0;
     }
+    thr->timer(Timer::KSPACE);
   } // end of parallel region
 }
 
@@ -226,6 +229,8 @@ void PPPMCGOMP::compute_gf_ad()
     int k,l,m,kper,lper,mper,n,nfrom,nto,tid;
 
     loop_setup_thr(nfrom, nto, tid, nfft, comm->nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
 
     for (n = nfrom; n < nto; ++n) {
 
@@ -279,8 +284,9 @@ void PPPMCGOMP::compute_gf_ad()
 	sf5 += sf_precoeff6[n]*greensfn[n];
       }
     }
+    thr->timer(Timer::KSPACE);
   } // end of paralle region
-  
+
   // compute the coefficients for the self-force correction
 
   double prex, prey, prez, tmp[6];
@@ -321,6 +327,7 @@ void PPPMCGOMP::compute(int eflag, int vflag)
     const int tid = 0;
 #endif
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
@@ -365,6 +372,7 @@ void PPPMCGOMP::make_rho()
 
     // get per thread data
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
 
     // loop over my charges, add their contribution to nearby grid points
@@ -380,7 +388,7 @@ void PPPMCGOMP::make_rho()
       const int ny = p2g[i].b;
       const int nz = p2g[i].t;
 
-      // pre-screen whether this atom will ever come within 
+      // pre-screen whether this atom will ever come within
       // reach of the data segement this thread is updating.
       if ( ((nz+nlower-nzlo_out)*ix*iy >= jto)
            || ((nz+nupper-nzlo_out+1)*ix*iy < jfrom) ) continue;
@@ -413,6 +421,7 @@ void PPPMCGOMP::make_rho()
         }
       }
     }
+    thr->timer(Timer::KSPACE);
   }
 }
 
@@ -448,6 +457,7 @@ void PPPMCGOMP::fieldforce_ik()
 
     // get per thread data
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
     FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
 
@@ -487,6 +497,7 @@ void PPPMCGOMP::fieldforce_ik()
       f[i].y += qfactor*eky;
       if (slabflag != 2) f[i].z += qfactor*ekz;
     }
+    thr->timer(Timer::KSPACE);
   } // end of parallel region
 }
 
@@ -528,6 +539,7 @@ void PPPMCGOMP::fieldforce_ad()
 
     // get per thread data
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
     FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
     FFT_SCALAR * const * const d1d = static_cast<FFT_SCALAR **>(thr->get_drho1d());
@@ -585,6 +597,7 @@ void PPPMCGOMP::fieldforce_ad()
       sf *= 2*qi;
       if (slabflag != 2) f[i].z += qfactor*(ekz - sf);
     }
+    thr->timer(Timer::KSPACE);
   } // end of parallel region
 }
 
@@ -619,6 +632,7 @@ void PPPMCGOMP::fieldforce_peratom()
 
     // get per thread data
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
 
     for (int j=ifrom; j < ito; ++j) {
@@ -667,6 +681,7 @@ void PPPMCGOMP::fieldforce_peratom()
         vatom[i][5] += qi*v5;
       }
     }
+    thr->timer(Timer::KSPACE);
   } // end of parallel region
 }
 
diff --git a/src/USER-OMP/pppm_disp_omp.cpp b/src/USER-OMP/pppm_disp_omp.cpp
index b22b553341..f3692b287a 100644
--- a/src/USER-OMP/pppm_disp_omp.cpp
+++ b/src/USER-OMP/pppm_disp_omp.cpp
@@ -74,7 +74,7 @@ void PPPMDispOMP::allocate()
     }
     if (function[1] + function[2]) {
       ThrData * thr = fix->get_thr(tid);
-      thr->init_pppm_disp(order_6,memory);  
+      thr->init_pppm_disp(order_6,memory);
     }
   }
 }
@@ -142,6 +142,8 @@ void PPPMDispOMP::compute_gf()
     const int nny = nyhi_fft-nylo_fft+1;
 
     loop_setup_thr(nnfrom, nnto, tid, nfft, comm->nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
 
     for (m = nzlo_fft; m <= nzhi_fft; m++) {
       mper = m - nz_pppm*(2*m/nz_pppm);
@@ -185,13 +187,14 @@ void PPPMDispOMP::compute_gf()
 
           if (sqk != 0.0) {
             numerator = 4.0*MY_PI/sqk;
-            denominator = gf_denom(snx2,sny2,snz2, gf_b, order);  
+            denominator = gf_denom(snx2,sny2,snz2, gf_b, order);
             greensfn[nn] = numerator*sx*sy*sz*wx*wy*wz/denominator;
           } else greensfn[nn] = 0.0;
         }
       }
     }
-  }
+    thr->timer(Timer::KSPACE);
+  } // end of parallel region
 }
 
 /* ----------------------------------------------------------------------
@@ -241,6 +244,8 @@ void PPPMDispOMP::compute_gf_6()
     const int nny = nyhi_fft_6-nylo_fft_6+1;
 
     loop_setup_thr(nnfrom, nnto, tid, nfft_6, comm->nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
 
     for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
       mper = m - nz_pppm_6*(2*m/nz_pppm_6);
@@ -252,7 +257,7 @@ void PPPMDispOMP::compute_gf_6()
       argz = 0.5*qz*zprd_slab/nz_pppm_6;
       if (argz != 0.0) wz = pow(sin(argz)/argz,order_6);
       wz *= wz;
-              
+
       for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
         lper = l - ny_pppm_6*(2*l/ny_pppm_6);
         qy = unitky*lper;
@@ -279,11 +284,11 @@ void PPPMDispOMP::compute_gf_6()
 	  argx = 0.5*qx*xprd/nx_pppm_6;
 	  if (argx != 0.0) wx = pow(sin(argx)/argx,order_6);
           wx *= wx;
-      
+
 	  sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0);
 
           if (sqk != 0.0) {
-	    denominator = gf_denom(snx2,sny2,snz2, gf_b_6, order_6); 
+	    denominator = gf_denom(snx2,sny2,snz2, gf_b_6, order_6);
 	    rtsqk = sqrt(sqk);
             term = (1-2*sqk*inv2ew*inv2ew)*sx*sy*sz +
                     2*sqk*rtsqk*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtsqk*inv2ew);
@@ -292,7 +297,8 @@ void PPPMDispOMP::compute_gf_6()
         }
       }
     }
-  }
+    thr->timer(Timer::KSPACE);
+  } // end of parallel region
 }
 /* ----------------------------------------------------------------------
    run the regular toplevel compute method from plain PPPPM
@@ -315,6 +321,7 @@ void PPPMDispOMP::compute(int eflag, int vflag)
 #endif
 
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
@@ -429,6 +436,7 @@ void PPPMDispOMP::make_rho_c()
 
     // get per thread data
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
 
     // loop over my charges, add their contribution to nearby grid points
@@ -442,7 +450,7 @@ void PPPMDispOMP::make_rho_c()
       const int ny = p2g[i].b;
       const int nz = p2g[i].t;
 
-      // pre-screen whether this atom will ever come within 
+      // pre-screen whether this atom will ever come within
       // reach of the data segement this thread is updating.
       if ( ((nz+nlower-nzlo_out)*ix*iy >= jto)
            || ((nz+nupper-nzlo_out+1)*ix*iy < jfrom) ) continue;
@@ -475,7 +483,8 @@ void PPPMDispOMP::make_rho_c()
         }
       }
     }
-  }
+    thr->timer(Timer::KSPACE);
+  } // end of parallel region
 }
 
 
@@ -516,6 +525,7 @@ void PPPMDispOMP::make_rho_g()
 
     // get per thread data
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d_6());
 
     // loop over my charges, add their contribution to nearby grid points
@@ -529,7 +539,7 @@ void PPPMDispOMP::make_rho_g()
       const int ny = p2g[i].b;
       const int nz = p2g[i].t;
 
-      // pre-screen whether this atom will ever come within 
+      // pre-screen whether this atom will ever come within
       // reach of the data segement this thread is updating.
       if ( ((nz+nlower_6-nzlo_out_6)*ix*iy >= jto)
            || ((nz+nupper_6-nzlo_out_6+1)*ix*iy < jfrom) ) continue;
@@ -564,7 +574,8 @@ void PPPMDispOMP::make_rho_g()
         }
       }
     }
-  }
+    thr->timer(Timer::KSPACE);
+  } // end of parallel region
 }
 
 
@@ -618,6 +629,7 @@ void PPPMDispOMP::make_rho_a()
 
     // get per thread data
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d_6());
 
     // loop over my charges, add their contribution to nearby grid points
@@ -631,7 +643,7 @@ void PPPMDispOMP::make_rho_a()
       const int ny = p2g[i].b;
       const int nz = p2g[i].t;
 
-      // pre-screen whether this atom will ever come within 
+      // pre-screen whether this atom will ever come within
       // reach of the data segement this thread is updating.
       if ( ((nz+nlower_6-nzlo_out_6)*ix*iy >= jto)
            || ((nz+nupper_6-nzlo_out_6+1)*ix*iy < jfrom) ) continue;
@@ -681,7 +693,8 @@ void PPPMDispOMP::make_rho_a()
         }
       }
     }
-  }
+    thr->timer(Timer::KSPACE);
+  } // end of parallel region
 }
 
 
@@ -726,6 +739,7 @@ void PPPMDispOMP::fieldforce_c_ik()
     const int tid = 0;
 #endif
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     double * const * const f = thr->get_f();
     FFT_SCALAR * const * const r1d =  static_cast<FFT_SCALAR **>(thr->get_rho1d());
 
@@ -770,7 +784,8 @@ void PPPMDispOMP::fieldforce_c_ik()
         f[i][2] += qfactor*ekz;
       }
     }
-  }
+    thr->timer(Timer::KSPACE);
+  } // end of parallel region
 }
 
 /* ----------------------------------------------------------------------
@@ -829,6 +844,7 @@ void PPPMDispOMP::fieldforce_c_ad()
     const int tid = 0;
 #endif
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     double * const * const f = thr->get_f();
     FFT_SCALAR * const * const r1d =  static_cast<FFT_SCALAR **>(thr->get_rho1d());
     FFT_SCALAR * const * const dr1d = static_cast<FFT_SCALAR **>(thr->get_drho1d());
@@ -893,7 +909,8 @@ void PPPMDispOMP::fieldforce_c_ad()
         if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf);
       }
     }
-  }
+    thr->timer(Timer::KSPACE);
+  } // end of parallel region
 }
 
 /* ----------------------------------------------------------------------
@@ -934,6 +951,7 @@ void PPPMDispOMP::fieldforce_c_peratom()
     const int tid = 0;
 #endif
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     FFT_SCALAR * const * const r1d =  static_cast<FFT_SCALAR **>(thr->get_rho1d());
 
     int l,m,n,nx,ny,nz,mx,my,mz;
@@ -989,7 +1007,8 @@ void PPPMDispOMP::fieldforce_c_peratom()
         }
       }
     }
-  }
+    thr->timer(Timer::KSPACE);
+  } // end of parallel region
 }
 
 /* ----------------------------------------------------------------------
@@ -1031,7 +1050,8 @@ void PPPMDispOMP::fieldforce_g_ik()
     const int tid = 0;
 #endif
     ThrData *thr = fix->get_thr(tid);
-    double * const * const f = thr->get_f();
+    thr->timer(Timer::START);
+ double * const * const f = thr->get_f();
     FFT_SCALAR * const * const r1d =  static_cast<FFT_SCALAR **>(thr->get_rho1d_6());
 
     int l,m,n,nx,ny,nz,mx,my,mz;
@@ -1078,7 +1098,8 @@ void PPPMDispOMP::fieldforce_g_ik()
         f[i][2] += lj*ekz;
       }
     }
-  }
+    thr->timer(Timer::KSPACE);
+  } // end of parallel region
 }
 
 /* ----------------------------------------------------------------------
@@ -1133,6 +1154,7 @@ void PPPMDispOMP::fieldforce_g_ad()
     const int tid = 0;
 #endif
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     double * const * const f = thr->get_f();
     FFT_SCALAR * const * const r1d =  static_cast<FFT_SCALAR **>(thr->get_rho1d_6());
     FFT_SCALAR * const * const dr1d = static_cast<FFT_SCALAR **>(thr->get_drho1d_6());
@@ -1200,7 +1222,8 @@ void PPPMDispOMP::fieldforce_g_ad()
         if (slabflag != 2) f[i][2] += ekz*lj - sf;
       }
     }
-  }
+    thr->timer(Timer::KSPACE);
+  } // end of parallel region
 }
 
 /* ----------------------------------------------------------------------
@@ -1241,6 +1264,7 @@ void PPPMDispOMP::fieldforce_g_peratom()
     const int tid = 0;
 #endif
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     FFT_SCALAR * const * const r1d =  static_cast<FFT_SCALAR **>(thr->get_rho1d_6());
 
     int l,m,n,nx,ny,nz,mx,my,mz;
@@ -1299,7 +1323,8 @@ void PPPMDispOMP::fieldforce_g_peratom()
         }
       }
     }
-  }
+    thr->timer(Timer::KSPACE);
+  } // end of parallel region
 }
 
 /* ----------------------------------------------------------------------
@@ -1341,6 +1366,7 @@ void PPPMDispOMP::fieldforce_a_ik()
     const int tid = 0;
 #endif
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     double * const * const f = thr->get_f();
     FFT_SCALAR * const * const r1d =  static_cast<FFT_SCALAR **>(thr->get_rho1d_6());
 
@@ -1420,7 +1446,8 @@ void PPPMDispOMP::fieldforce_a_ik()
         f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6;
       }
     }
-  }
+    thr->timer(Timer::KSPACE);
+  } // end of parallel region
 }
 
 /* ----------------------------------------------------------------------
@@ -1475,6 +1502,7 @@ void PPPMDispOMP::fieldforce_a_ad()
     const int tid = 0;
 #endif
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     double * const * const f = thr->get_f();
     FFT_SCALAR * const * const r1d =  static_cast<FFT_SCALAR **>(thr->get_rho1d_6());
     FFT_SCALAR * const * const dr1d = static_cast<FFT_SCALAR **>(thr->get_drho1d_6());
@@ -1609,7 +1637,8 @@ void PPPMDispOMP::fieldforce_a_ad()
         if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6 - sf;
       }
     }
-  }
+    thr->timer(Timer::KSPACE);
+  } // end of parallel region
 }
 
 /* ----------------------------------------------------------------------
@@ -1650,6 +1679,7 @@ void PPPMDispOMP::fieldforce_a_peratom()
     const int tid = 0;
 #endif
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     FFT_SCALAR * const * const r1d =  static_cast<FFT_SCALAR **>(thr->get_rho1d_6());
 
     int l,m,n,nx,ny,nz,mx,my,mz;
@@ -1761,25 +1791,26 @@ void PPPMDispOMP::fieldforce_a_peratom()
         lj6 = B[7*type]*0.5;
 
         if (eflag_atom)
-          eatom[i] += u0*lj0 + u1*lj1 + u2*lj2 + 
+          eatom[i] += u0*lj0 + u1*lj1 + u2*lj2 +
             u3*lj3 + u4*lj4 + u5*lj5 + u6*lj6;
         if (vflag_atom) {
-          vatom[i][0] += v00*lj0 + v01*lj1 + v02*lj2 + v03*lj3 + 
+          vatom[i][0] += v00*lj0 + v01*lj1 + v02*lj2 + v03*lj3 +
             v04*lj4 + v05*lj5 + v06*lj6;
-          vatom[i][1] += v10*lj0 + v11*lj1 + v12*lj2 + v13*lj3 + 
+          vatom[i][1] += v10*lj0 + v11*lj1 + v12*lj2 + v13*lj3 +
             v14*lj4 + v15*lj5 + v16*lj6;
-          vatom[i][2] += v20*lj0 + v21*lj1 + v22*lj2 + v23*lj3 + 
+          vatom[i][2] += v20*lj0 + v21*lj1 + v22*lj2 + v23*lj3 +
             v24*lj4 + v25*lj5 + v26*lj6;
-          vatom[i][3] += v30*lj0 + v31*lj1 + v32*lj2 + v33*lj3 + 
+          vatom[i][3] += v30*lj0 + v31*lj1 + v32*lj2 + v33*lj3 +
             v34*lj4 + v35*lj5 + v36*lj6;
-          vatom[i][4] += v40*lj0 + v41*lj1 + v42*lj2 + v43*lj3 + 
+          vatom[i][4] += v40*lj0 + v41*lj1 + v42*lj2 + v43*lj3 +
             v44*lj4 + v45*lj5 + v46*lj6;
-          vatom[i][5] += v50*lj0 + v51*lj1 + v52*lj2 + v53*lj3 + 
+          vatom[i][5] += v50*lj0 + v51*lj1 + v52*lj2 + v53*lj3 +
             v54*lj4 + v55*lj5 + v56*lj6;
         }
       }
     }
-  }
+    thr->timer(Timer::KSPACE);
+  } // end of parallel region
 }
 
 /* ----------------------------------------------------------------------
diff --git a/src/USER-OMP/pppm_disp_omp.h b/src/USER-OMP/pppm_disp_omp.h
index e2f588d169..060f269471 100644
--- a/src/USER-OMP/pppm_disp_omp.h
+++ b/src/USER-OMP/pppm_disp_omp.h
@@ -41,7 +41,7 @@ namespace LAMMPS_NS {
   virtual void particle_map(double,double,double,
                             double,int**,int,int,
                             int,int,int,int,int,int);
-                                
+
 
   virtual void fieldforce_c_ik();
   virtual void fieldforce_c_ad();
diff --git a/src/USER-OMP/pppm_omp.cpp b/src/USER-OMP/pppm_omp.cpp
index bcda74e92a..a62199be56 100644
--- a/src/USER-OMP/pppm_omp.cpp
+++ b/src/USER-OMP/pppm_omp.cpp
@@ -135,6 +135,8 @@ void PPPMOMP::compute_gf_ik()
     int k,l,m,nx,ny,nz,kper,lper,mper,n,nfrom,nto,tid;
 
     loop_setup_thr(nfrom, nto, tid, nfft, comm->nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
 
     for (n = nfrom; n < nto; ++n) {
       m = n / (numl*numk);
@@ -187,6 +189,7 @@ void PPPMOMP::compute_gf_ik()
         greensfn[n] = numerator*sum1/denominator;
       } else greensfn[n] = 0.0;
     }
+    thr->timer(Timer::KSPACE);
   } // end of parallel region
 }
 
@@ -223,6 +226,8 @@ void PPPMOMP::compute_gf_ad()
     int k,l,m,kper,lper,mper,n,nfrom,nto,tid;
 
     loop_setup_thr(nfrom, nto, tid, nfft, comm->nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
 
     for (n = nfrom; n < nto; ++n) {
 
@@ -276,8 +281,9 @@ void PPPMOMP::compute_gf_ad()
         sf5 += sf_precoeff6[n]*greensfn[n];
       }
     }
+    thr->timer(Timer::KSPACE);
   } // end of paralle region
-  
+
   // compute the coefficients for the self-force correction
 
   double prex, prey, prez, tmp[6];
@@ -318,6 +324,7 @@ void PPPMOMP::compute(int eflag, int vflag)
     const int tid = 0;
 #endif
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
@@ -363,6 +370,7 @@ void PPPMOMP::make_rho()
 
     // get per thread data
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
 
     // loop over my charges, add their contribution to nearby grid points
@@ -376,7 +384,7 @@ void PPPMOMP::make_rho()
       const int ny = p2g[i].b;
       const int nz = p2g[i].t;
 
-      // pre-screen whether this atom will ever come within 
+      // pre-screen whether this atom will ever come within
       // reach of the data segement this thread is updating.
       if ( ((nz+nlower-nzlo_out)*ix*iy >= jto)
            || ((nz+nupper-nzlo_out+1)*ix*iy < jfrom) ) continue;
@@ -409,6 +417,7 @@ void PPPMOMP::make_rho()
         }
       }
     }
+    thr->timer(Timer::KSPACE);
   }
 }
 
@@ -451,6 +460,7 @@ void PPPMOMP::fieldforce_ik()
 
     // get per thread data
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
     FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
 
@@ -488,6 +498,7 @@ void PPPMOMP::fieldforce_ik()
       f[i].y += qfactor*eky;
       if (slabflag != 2) f[i].z += qfactor*ekz;
     }
+    thr->timer(Timer::KSPACE);
   } // end of parallel region
 }
 
@@ -535,6 +546,7 @@ void PPPMOMP::fieldforce_ad()
 
     // get per thread data
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
     FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
     FFT_SCALAR * const * const d1d = static_cast<FFT_SCALAR **>(thr->get_drho1d());
@@ -590,6 +602,7 @@ void PPPMOMP::fieldforce_ad()
       sf *= 2.0*qi;
       if (slabflag != 2) f[i].z += qfactor*(ekz - sf);
     }
+    thr->timer(Timer::KSPACE);
   } // end of parallel region
 }
 
@@ -626,6 +639,7 @@ void PPPMOMP::fieldforce_peratom()
 
     // get per thread data
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
 
     for (i = ifrom; i < ito; ++i) {
@@ -672,6 +686,7 @@ void PPPMOMP::fieldforce_peratom()
         vatom[i][5] += qi*v5;
       }
     }
+    thr->timer(Timer::KSPACE);
   } // end of parallel region
 }
 
diff --git a/src/USER-OMP/pppm_tip4p_omp.cpp b/src/USER-OMP/pppm_tip4p_omp.cpp
index 9e6efb3512..b2e344036e 100644
--- a/src/USER-OMP/pppm_tip4p_omp.cpp
+++ b/src/USER-OMP/pppm_tip4p_omp.cpp
@@ -136,6 +136,8 @@ void PPPMTIP4POMP::compute_gf_ik()
     int k,l,m,nx,ny,nz,kper,lper,mper,n,nfrom,nto,tid;
 
     loop_setup_thr(nfrom, nto, tid, nfft, comm->nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
 
     for (n = nfrom; n < nto; ++n) {
       m = n / (numl*numk);
@@ -188,6 +190,7 @@ void PPPMTIP4POMP::compute_gf_ik()
         greensfn[n] = numerator*sum1/denominator;
       } else greensfn[n] = 0.0;
     }
+    thr->timer(Timer::KSPACE);
   } // end of parallel region
 }
 
@@ -224,6 +227,8 @@ void PPPMTIP4POMP::compute_gf_ad()
     int k,l,m,kper,lper,mper,n,nfrom,nto,tid;
 
     loop_setup_thr(nfrom, nto, tid, nfft, comm->nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
 
     for (n = nfrom; n < nto; ++n) {
 
@@ -277,8 +282,9 @@ void PPPMTIP4POMP::compute_gf_ad()
         sf5 += sf_precoeff6[n]*greensfn[n];
       }
     }
+    thr->timer(Timer::KSPACE);
   } // end of paralle region
-  
+
   // compute the coefficients for the self-force correction
 
   double prex, prey, prez, tmp[6];
@@ -319,6 +325,7 @@ void PPPMTIP4POMP::compute(int eflag, int vflag)
     const int tid = 0;
 #endif
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 }
@@ -428,6 +435,7 @@ void PPPMTIP4POMP::make_rho()
 
     // get per thread data
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
 
     // loop over my charges, add their contribution to nearby grid points
@@ -441,7 +449,7 @@ void PPPMTIP4POMP::make_rho()
       const int ny = p2g[i].b;
       const int nz = p2g[i].t;
 
-      // pre-screen whether this atom will ever come within 
+      // pre-screen whether this atom will ever come within
       // reach of the data segement this thread is updating.
       if ( ((nz+nlower-nzlo_out)*ix*iy >= jto)
            || ((nz+nupper-nzlo_out+1)*ix*iy < jfrom) ) continue;
@@ -479,6 +487,7 @@ void PPPMTIP4POMP::make_rho()
         }
       }
     }
+    thr->timer(Timer::KSPACE);
   }
 }
 
@@ -523,6 +532,7 @@ void PPPMTIP4POMP::fieldforce_ik()
 
     // get per thread data
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
     FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
 
@@ -583,6 +593,7 @@ void PPPMTIP4POMP::fieldforce_ik()
         if (slabflag != 2) f[iH2].z += 0.5*alpha*fz;
       }
     }
+    thr->timer(Timer::KSPACE);
   } // end of parallel region
 }
 
@@ -633,6 +644,7 @@ void PPPMTIP4POMP::fieldforce_ad()
 
     // get per thread data
     ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
     dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
     FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
     FFT_SCALAR * const * const d1d = static_cast<FFT_SCALAR **>(thr->get_drho1d());
@@ -711,6 +723,7 @@ void PPPMTIP4POMP::fieldforce_ad()
         if (slabflag != 2) f[iH2].z += 0.5*alpha*fz;
       }
     }
+    thr->timer(Timer::KSPACE);
   } // end of parallel region
 }
 
diff --git a/src/USER-OMP/respa_omp.cpp b/src/USER-OMP/respa_omp.cpp
index b044e94ace..ed08f019fb 100644
--- a/src/USER-OMP/respa_omp.cpp
+++ b/src/USER-OMP/respa_omp.cpp
@@ -46,7 +46,7 @@ using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-RespaOMP::RespaOMP(LAMMPS *lmp, int narg, char **arg) 
+RespaOMP::RespaOMP(LAMMPS *lmp, int narg, char **arg)
   : Respa(lmp, narg, arg),ThrOMP(lmp, THR_INTGR)
 {
 }
@@ -69,7 +69,12 @@ void RespaOMP::init()
 
 void RespaOMP::setup()
 {
-  if (comm->me == 0 && screen) fprintf(screen,"Setting up run ...\n");
+  if (comm->me == 0 && screen) {
+    fprintf(screen,"Setting up r-RESPA/omp run ...\n");
+    fprintf(screen,"  Unit style    : %s\n", update->unit_style);
+    fprintf(screen,"  Current step  : " BIGINT_FORMAT "\n", update->ntimestep);
+    fprintf(screen,"  OuterTime step: %g\n", update->dt);
+  }
 
   update->setupflag = 1;
 
@@ -101,14 +106,11 @@ void RespaOMP::setup()
   for (int ilevel = 0; ilevel < nlevels; ilevel++) {
     force_clear(newton[ilevel]);
     modify->setup_pre_force_respa(vflag,ilevel);
-    if (level_bond == ilevel && force->bond)
-      force->bond->compute(eflag,vflag);
-    if (level_angle == ilevel && force->angle)
-      force->angle->compute(eflag,vflag);
-    if (level_dihedral == ilevel && force->dihedral)
-      force->dihedral->compute(eflag,vflag);
-    if (level_improper == ilevel && force->improper)
-      force->improper->compute(eflag,vflag);
+
+    if (nhybrid_styles > 0) {
+      set_compute_flags(ilevel);
+      force->pair->compute(eflag,vflag);
+    }
     if (level_pair == ilevel && pair_compute_flag)
       force->pair->compute(eflag,vflag);
     if (level_inner == ilevel && pair_compute_flag)
@@ -117,6 +119,14 @@ void RespaOMP::setup()
       force->pair->compute_middle();
     if (level_outer == ilevel && pair_compute_flag)
       force->pair->compute_outer(eflag,vflag);
+    if (level_bond == ilevel && force->bond)
+      force->bond->compute(eflag,vflag);
+    if (level_angle == ilevel && force->angle)
+      force->angle->compute(eflag,vflag);
+    if (level_dihedral == ilevel && force->dihedral)
+      force->dihedral->compute(eflag,vflag);
+    if (level_improper == ilevel && force->improper)
+      force->improper->compute(eflag,vflag);
     if (level_kspace == ilevel && force->kspace) {
       force->kspace->setup();
       if (kspace_compute_flag) force->kspace->compute(eflag,vflag);
@@ -139,7 +149,7 @@ void RespaOMP::setup()
       }
       fix->did_reduce();
     }
-      
+
     if (newton[ilevel]) comm->reverse_comm();
     copy_f_flevel(ilevel);
   }
@@ -188,14 +198,12 @@ void RespaOMP::setup_minimal(int flag)
   for (int ilevel = 0; ilevel < nlevels; ilevel++) {
     force_clear(newton[ilevel]);
     modify->setup_pre_force_respa(vflag,ilevel);
-    if (level_bond == ilevel && force->bond)
-      force->bond->compute(eflag,vflag);
-    if (level_angle == ilevel && force->angle)
-      force->angle->compute(eflag,vflag);
-    if (level_dihedral == ilevel && force->dihedral)
-      force->dihedral->compute(eflag,vflag);
-    if (level_improper == ilevel && force->improper)
-      force->improper->compute(eflag,vflag);
+
+    if (nhybrid_styles > 0) {
+      set_compute_flags(ilevel);
+      force->pair->compute(eflag,vflag);
+    }
+
     if (level_pair == ilevel && pair_compute_flag)
       force->pair->compute(eflag,vflag);
     if (level_inner == ilevel && pair_compute_flag)
@@ -204,6 +212,14 @@ void RespaOMP::setup_minimal(int flag)
       force->pair->compute_middle();
     if (level_outer == ilevel && pair_compute_flag)
       force->pair->compute_outer(eflag,vflag);
+    if (level_bond == ilevel && force->bond)
+      force->bond->compute(eflag,vflag);
+    if (level_angle == ilevel && force->angle)
+      force->angle->compute(eflag,vflag);
+    if (level_dihedral == ilevel && force->dihedral)
+      force->dihedral->compute(eflag,vflag);
+    if (level_improper == ilevel && force->improper)
+      force->improper->compute(eflag,vflag);
     if (level_kspace == ilevel && force->kspace) {
       force->kspace->setup();
       if (kspace_compute_flag) force->kspace->compute(eflag,vflag);
@@ -244,9 +260,11 @@ void RespaOMP::recurse(int ilevel)
 
   for (int iloop = 0; iloop < loop[ilevel]; iloop++) {
 
+    timer->stamp();
     modify->initial_integrate_respa(vflag,ilevel,iloop);
     if (modify->n_post_integrate_respa)
       modify->post_integrate_respa(ilevel,iloop);
+    timer->stamp(Timer::MODIFY);
 
     // at outermost level, check on rebuilding neighbor list
     // at innermost level, communicate
@@ -255,7 +273,11 @@ void RespaOMP::recurse(int ilevel)
     if (ilevel == nlevels-1) {
       int nflag = neighbor->decide();
       if (nflag) {
-        if (modify->n_pre_exchange) modify->pre_exchange();
+        if (modify->n_pre_exchange) {
+          timer->stamp();
+          modify->pre_exchange();
+          timer->stamp(Timer::MODIFY);
+        }
         if (triclinic) domain->x2lamda(atom->nlocal);
         domain->pbc();
         if (domain->box_change) {
@@ -265,20 +287,27 @@ void RespaOMP::recurse(int ilevel)
         }
         timer->stamp();
         comm->exchange();
-        if (atom->sortfreq > 0 && 
+        if (atom->sortfreq > 0 &&
             update->ntimestep >= atom->nextsort) atom->sort();
         comm->borders();
         if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
-        timer->stamp(TIME_COMM);
-        if (modify->n_pre_neighbor) modify->pre_neighbor();
+        timer->stamp(Timer::COMM);
+        if (modify->n_pre_neighbor) {
+          modify->pre_neighbor();
+          timer->stamp(Timer::MODIFY);
+        }
         neighbor->build();
-        timer->stamp(TIME_NEIGHBOR);
+        timer->stamp(Timer::NEIGH);
+      } else if (ilevel == 0) {
+        timer->stamp();
+        comm->forward_comm();
+        timer->stamp(Timer::COMM);
       }
 
     } else if (ilevel == 0) {
       timer->stamp();
       comm->forward_comm();
-      timer->stamp(TIME_COMM);
+      timer->stamp(Timer::COMM);
     }
 
     // rRESPA recursion thru all levels
@@ -295,45 +324,53 @@ void RespaOMP::recurse(int ilevel)
     // when potentials are invoked at same level
 
     force_clear(newton[ilevel]);
-    if (modify->n_pre_force_respa)
+    if (modify->n_pre_force_respa) {
+      timer->stamp();
       modify->pre_force_respa(vflag,ilevel,iloop);
+      timer->stamp(Timer::MODIFY);
+    }
 
     timer->stamp();
-    if (level_bond == ilevel && force->bond) {
-      force->bond->compute(eflag,vflag);
-      timer->stamp(TIME_BOND);
-    }
-    if (level_angle == ilevel && force->angle) {
-      force->angle->compute(eflag,vflag);
-      timer->stamp(TIME_BOND);
-    }
-    if (level_dihedral == ilevel && force->dihedral) {
-      force->dihedral->compute(eflag,vflag);
-      timer->stamp(TIME_BOND);
-    }
-    if (level_improper == ilevel && force->improper) {
-      force->improper->compute(eflag,vflag);
-      timer->stamp(TIME_BOND);
+    if (nhybrid_styles > 0) {
+      set_compute_flags(ilevel);
+      force->pair->compute(eflag,vflag);
+      timer->stamp(Timer::PAIR);
     }
     if (level_pair == ilevel && pair_compute_flag) {
       force->pair->compute(eflag,vflag);
-      timer->stamp(TIME_PAIR);
+      timer->stamp(Timer::PAIR);
     }
     if (level_inner == ilevel && pair_compute_flag) {
       force->pair->compute_inner();
-      timer->stamp(TIME_PAIR);
+      timer->stamp(Timer::PAIR);
     }
     if (level_middle == ilevel && pair_compute_flag) {
       force->pair->compute_middle();
-      timer->stamp(TIME_PAIR);
+      timer->stamp(Timer::PAIR);
     }
     if (level_outer == ilevel && pair_compute_flag) {
       force->pair->compute_outer(eflag,vflag);
-      timer->stamp(TIME_PAIR);
+      timer->stamp(Timer::PAIR);
+    }
+    if (level_bond == ilevel && force->bond) {
+      force->bond->compute(eflag,vflag);
+      timer->stamp(Timer::BOND);
+    }
+    if (level_angle == ilevel && force->angle) {
+      force->angle->compute(eflag,vflag);
+      timer->stamp(Timer::BOND);
+    }
+    if (level_dihedral == ilevel && force->dihedral) {
+      force->dihedral->compute(eflag,vflag);
+      timer->stamp(Timer::BOND);
+    }
+    if (level_improper == ilevel && force->improper) {
+      force->improper->compute(eflag,vflag);
+      timer->stamp(Timer::BOND);
     }
     if (level_kspace == ilevel && kspace_compute_flag) {
       force->kspace->compute(eflag,vflag);
-      timer->stamp(TIME_KSPACE);
+      timer->stamp(Timer::KSPACE);
     }
 
     // reduce forces from per-thread arrays, if needed
@@ -356,14 +393,14 @@ void RespaOMP::recurse(int ilevel)
 
     if (newton[ilevel]) {
       comm->reverse_comm();
-      timer->stamp(TIME_COMM);
+      timer->stamp(Timer::COMM);
     }
-
+    timer->stamp();
     if (modify->n_post_force_respa)
       modify->post_force_respa(vflag,ilevel,iloop);
     modify->final_integrate_respa(ilevel,iloop);
+    timer->stamp(Timer::MODIFY);
   }
 
   copy_f_flevel(ilevel);
 }
-
diff --git a/src/USER-OMP/thr_data.cpp b/src/USER-OMP/thr_data.cpp
index 598fb85289..de09dadc58 100644
--- a/src/USER-OMP/thr_data.cpp
+++ b/src/USER-OMP/thr_data.cpp
@@ -22,16 +22,17 @@
 #include <stdio.h>
 
 #include "memory.h"
+#include "timer.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-ThrData::ThrData(int tid)
+ThrData::ThrData(int tid, Timer *t)
   : _f(0),_torque(0),_erforce(0),_de(0),_drho(0),_mu(0),_lambda(0),_rhoB(0),
-    _D_values(0),_rho(0),_fp(0),_rho1d(0),_drho1d(0),_tid(tid)
+    _D_values(0),_rho(0),_fp(0),_rho1d(0),_drho1d(0),_tid(tid), _timer(t)
 {
-  // nothing else to do here.
+  _timer_active = 0;
 }
 
 
@@ -45,6 +46,30 @@ void ThrData::check_tid(int tid)
 
 /* ---------------------------------------------------------------------- */
 
+void ThrData::_stamp(enum Timer::ttype flag)
+{
+  // do nothing until it gets set to 0 in ::setup()
+  if (_timer_active < 0) return;
+
+  if (flag == Timer::START) {
+    _timer_active = 1;
+  }
+
+  if (_timer_active) _timer->stamp(flag);
+}
+
+/* ---------------------------------------------------------------------- */
+
+double ThrData::get_time(enum Timer::ttype flag)
+{
+  if (_timer)
+    return _timer->get_wall(flag);
+  else
+    return 0.0;
+}
+
+/* ---------------------------------------------------------------------- */
+
 void ThrData::init_force(int nall, double **f, double **torque,
                          double *erforce, double *de, double *drho)
 {
@@ -59,32 +84,29 @@ void ThrData::init_force(int nall, double **f, double **torque,
   eatom_pair=eatom_bond=eatom_angle=eatom_dihed=eatom_imprp=eatom_kspce=NULL;
   vatom_pair=vatom_bond=vatom_angle=vatom_dihed=vatom_imprp=vatom_kspce=NULL;
 
-  _f = f + _tid*nall;
-  if (nall > 0)
+  if (nall > 0 && f) {
+    _f = f + _tid*nall;
     memset(&(_f[0][0]),0,nall*3*sizeof(double));
+  } else _f = NULL;
 
-  if (torque) {
+  if (nall > 0 && torque) {
     _torque = torque + _tid*nall;
-    if (nall > 0)
-      memset(&(_torque[0][0]),0,nall*3*sizeof(double));
+    memset(&(_torque[0][0]),0,nall*3*sizeof(double));
   } else _torque = NULL;
 
-  if (erforce) {
+  if (nall > 0 && erforce) {
     _erforce = erforce + _tid*nall;
-    if (nall > 0)
-      memset(&(_erforce[0]),0,nall*sizeof(double));
+    memset(&(_erforce[0]),0,nall*sizeof(double));
   } else _erforce = NULL;
 
-  if (de) {
+  if (nall > 0 && de) {
     _de = de + _tid*nall;
-    if (nall > 0)
-      memset(&(_de[0]),0,nall*sizeof(double));
+    memset(&(_de[0]),0,nall*sizeof(double));
   } else _de = NULL;
 
-  if (drho) {
+  if (nall > 0 && drho) {
     _drho = drho + _tid*nall;
-    if (nall > 0)
-      memset(&(_drho[0]),0,nall*sizeof(double));
+    memset(&(_drho[0]),0,nall*sizeof(double));
   } else _drho = NULL;
 }
 
diff --git a/src/USER-OMP/thr_data.h b/src/USER-OMP/thr_data.h
index bfcb110794..3f1d866a80 100644
--- a/src/USER-OMP/thr_data.h
+++ b/src/USER-OMP/thr_data.h
@@ -22,6 +22,8 @@
 #include <omp.h>
 #endif
 
+#include "timer.h"
+
 namespace LAMMPS_NS {
 
 // per thread data accumulators
@@ -32,12 +34,17 @@ class ThrData {
   friend class ThrOMP;
 
  public:
-  ThrData(int tid);
+  ThrData(int tid, class Timer *t);
   ~ThrData() {};
 
   void check_tid(int);    // thread id consistency check
   int get_tid() const { return _tid; }; // our thread id.
 
+  // inline wrapper, to make this more efficient
+  // when per-thread timers are off
+  void timer(enum Timer::ttype flag) { if (_timer) _stamp(flag); };
+  double get_time(enum Timer::ttype flag);
+
   // erase accumulator contents and hook up force arrays
   void init_force(int, double **, double **, double *, double *, double *);
 
@@ -118,6 +125,12 @@ class ThrData {
   void *_drho1d_6;
   // my thread id
   const int _tid;
+  // timer info
+  int _timer_active;
+  class Timer *_timer;
+
+ private:
+  void _stamp(enum Timer::ttype flag);
 
  public:
   // compute global per thread virial contribution from global forces and positions
diff --git a/src/USER-OMP/thr_omp.cpp b/src/USER-OMP/thr_omp.cpp
index 4aea630554..4462f70bf1 100644
--- a/src/USER-OMP/thr_omp.cpp
+++ b/src/USER-OMP/thr_omp.cpp
@@ -23,6 +23,7 @@
 #include "memory.h"
 #include "modify.h"
 #include "neighbor.h"
+#include "timer.h"
 
 #include "thr_omp.h"
 
@@ -183,7 +184,7 @@ void ThrOMP::reduce_thr(void *style, const int eflag, const int vflag,
           // pair_style hybrid will compute fdotr for us
           // but we first need to reduce the forces
           data_reduce_thr(&(f[0][0]), nall, nthreads, 3, tid);
-	  fix->did_reduce();
+          fix->did_reduce();
           need_force_reduce = 0;
         }
       }
@@ -402,6 +403,7 @@ void ThrOMP::reduce_thr(void *style, const int eflag, const int vflag,
     if (lmp->atom->torque)
       data_reduce_thr(&(lmp->atom->torque[0][0]), nall, nthreads, 3, tid);
   }
+  thr->timer(Timer::COMM);
 }
 
 /* ----------------------------------------------------------------------
diff --git a/src/finish.cpp b/src/finish.cpp
index ca0a883993..b145809282 100644
--- a/src/finish.cpp
+++ b/src/finish.cpp
@@ -16,9 +16,10 @@
 #include "string.h"
 #include "stdio.h"
 #include "finish.h"
-#include "lammps.h"
+#include "timer.h"
 #include "universe.h"
 #include "accelerator_kokkos.h"
+#include "accelerator_omp.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "molecule.h"
@@ -30,12 +31,28 @@
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
-#include "timer.h"
 #include "output.h"
 #include "memory.h"
 
+#ifdef LMP_USER_OMP
+#include "modify.h"
+#include "fix_omp.h"
+#include "thr_data.h"
+#endif
+
 using namespace LAMMPS_NS;
 
+// local function prototypes, code at end of file
+
+static void mpi_timings(const char *label, Timer *t, enum Timer::ttype tt,
+                        MPI_Comm world, const int nprocs, const int nthreads,
+                        const int me, double time_loop, FILE *scr, FILE *log);
+
+#ifdef LMP_USER_OMP
+static void omp_times(FixOMP *fix, const char *label, enum Timer::ttype which,
+                      const int nthreads,FILE *scr, FILE *log);
+#endif
+
 /* ---------------------------------------------------------------------- */
 
 Finish::Finish(LAMMPS *lmp) : Pointers(lmp) {}
@@ -46,14 +63,16 @@ void Finish::end(int flag)
 {
   int i,m,nneigh,nneighfull;
   int histo[10];
-  int loopflag,minflag,prdflag,tadflag,timeflag,fftflag,histoflag,neighflag;
+  int minflag,prdflag,tadflag,timeflag,fftflag,histoflag,neighflag;
   double time,tmp,ave,max,min;
-  double time_loop,time_other;
+  double time_loop,time_other,cpu_loop;
 
   int me,nprocs;
   MPI_Comm_rank(world,&me);
   MPI_Comm_size(world,&nprocs);
 
+  const int nthreads = comm->nthreads;
+
   // recompute natoms in case atoms have been lost
 
   bigint nblocal = atom->nlocal;
@@ -67,8 +86,8 @@ void Finish::end(int flag)
   // flag = 3 = TAD
   // turn off neighflag for Kspace partition of verlet/split integrator
 
-  loopflag = 1;
   minflag = prdflag = tadflag = timeflag = fftflag = histoflag = neighflag = 0;
+  time_loop = cpu_loop = time_other = 0.0;
 
   if (flag == 1) {
     if (update->whichflag == 2) minflag = 1;
@@ -80,53 +99,88 @@ void Finish::end(int flag)
     if (force->kspace && force->kspace_match("pppm",0)
         && force->kspace->fftbench) fftflag = 1;
   }
-  if (flag == 2) prdflag = histoflag = neighflag = 1;
+  if (flag == 2) prdflag = timeflag = histoflag = neighflag = 1;
   if (flag == 3) tadflag = histoflag = neighflag = 1;
 
   // loop stats
 
-  if (loopflag) {
-    time_other = timer->array[TIME_LOOP] -
-      (timer->array[TIME_PAIR] + timer->array[TIME_BOND] +
-       timer->array[TIME_KSPACE] + timer->array[TIME_NEIGHBOR] +
-       timer->array[TIME_COMM] + timer->array[TIME_OUTPUT]);
+  if (timer->has_loop()) {
+    
+    // overall loop time
 
-    time_loop = timer->array[TIME_LOOP];
+    time_loop = timer->get_wall(Timer::TOTAL);
+    cpu_loop = timer->get_cpu(Timer::TOTAL);
     MPI_Allreduce(&time_loop,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
     time_loop = tmp/nprocs;
+    MPI_Allreduce(&cpu_loop,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+    cpu_loop = tmp/nprocs;
+    if (time_loop > 0.0) cpu_loop = cpu_loop/time_loop*100.0;
+ 
+    if (me == 0) {
+      int ntasks = nprocs * nthreads;
+      const char fmt1[] = "Loop time of %g on %d procs "
+        "for %d steps with " BIGINT_FORMAT " atoms\n\n";
+      if (screen) fprintf(screen,fmt1,time_loop,ntasks,update->nsteps,
+                          atom->natoms,cpu_loop);
+      if (logfile) fprintf(logfile,fmt1,time_loop,ntasks,update->nsteps,
+                           atom->natoms,cpu_loop);
+
+      // Gromacs/NAMD-style performance metric for suitable unit settings
+
+      if ( timeflag && !minflag && !prdflag && !tadflag &&
+           (update->nsteps > 0) && (update->dt != 0.0) &&
+           ((strcmp(update->unit_style,"lj") == 0) ||
+            (strcmp(update->unit_style,"metal") == 0) ||
+            (strcmp(update->unit_style,"micro") == 0) ||
+            (strcmp(update->unit_style,"nano") == 0) ||
+            (strcmp(update->unit_style,"electron") == 0) ||
+            (strcmp(update->unit_style,"real") == 0)) ) {
+        double one_fs = force->femtosecond;
+        double t_step = ((double) time_loop) / ((double) update->nsteps);
+        double step_t = 1.0/t_step;
+
+        if (strcmp(update->unit_style,"lj") == 0) {
+          double tau_day = 24.0*3600.0 / t_step * update->dt / one_fs;
+          const char perf[] = "Performance: %.3f tau/day, %.3f timesteps/s\n";
+          if (screen) fprintf(screen,perf,tau_day,step_t);
+          if (logfile) fprintf(logfile,perf,tau_day,step_t);
+        } else {
+          double hrs_ns = t_step / update->dt * 1000000.0 * one_fs / 3600.0;
+          double ns_day = 24.0*3600.0 / t_step * update->dt / one_fs/1000000.0;
+          const char perf[] = 
+            "Performance: %.3f ns/day, %.3f hours/ns, %.3f timesteps/s\n";
+          if (screen) fprintf(screen,perf,ns_day,hrs_ns,step_t);
+          if (logfile) fprintf(logfile,perf,ns_day,hrs_ns,step_t);
+        }
+      }
 
-    // overall loop time
+      // CPU use on MPI tasks and OpenMP threads
 
-#if defined(_OPENMP)
-    if (me == 0) {
-      int ntasks = nprocs * comm->nthreads;
-      if (screen) fprintf(screen,
-                          "Loop time of %g on %d procs (%d MPI x %d OpenMP) "
-                          "for %d steps with " BIGINT_FORMAT " atoms\n",
-                          time_loop,ntasks,nprocs,comm->nthreads,
-                          update->nsteps,atom->natoms);
-      if (logfile) fprintf(logfile,
-                          "Loop time of %g on %d procs (%d MPI x %d OpenMP) "
-                          "for %d steps with " BIGINT_FORMAT " atoms\n",
-                          time_loop,ntasks,nprocs,comm->nthreads,
-                          update->nsteps,atom->natoms);
-    }
+#ifdef LMP_USER_OMP
+      const char fmt2[] = 
+        "%.1f%% CPU use with %d MPI tasks x %d OpenMP threads\n";
+      if (screen) fprintf(screen,fmt2,cpu_loop,nprocs,nthreads);
+      if (logfile) fprintf(logfile,fmt2,cpu_loop,nprocs,nthreads);
 #else
-    if (me == 0) {
-      if (screen) fprintf(screen,
-                          "Loop time of %g on %d procs for %d steps with "
-                          BIGINT_FORMAT " atoms\n",
-                          time_loop,nprocs,update->nsteps,atom->natoms);
-      if (logfile) fprintf(logfile,
-                           "Loop time of %g on %d procs for %d steps with "
-                           BIGINT_FORMAT " atoms\n",
-                           time_loop,nprocs,update->nsteps,atom->natoms);
-    }
+      const char fmt2[] =
+        "%.1f%% CPU use with %d MPI tasks x no OpenMP threads\n";
+      if (screen) fprintf(screen,fmt2,cpu_loop,nprocs);
+      if (logfile) fprintf(logfile,fmt2,cpu_loop,nprocs);
 #endif
 
-    if (time_loop == 0.0) time_loop = 1.0;
+    }
   }
 
+  // avoid division by zero for very short runs
+
+  if (time_loop == 0.0) time_loop = 1.0;
+  if (cpu_loop == 0.0) cpu_loop = 100.0;
+
+  // get "Other" wall time for later use
+
+  if (timer->has_normal())
+    time_other = timer->get_wall(Timer::TOTAL) - timer->get_wall(Timer::ALL);
+   
   // minimization stats
 
   if (minflag) {
@@ -190,7 +244,7 @@ void Finish::end(int flag)
     if (screen) fprintf(screen,"PRD stats:\n");
     if (logfile) fprintf(logfile,"PRD stats:\n");
 
-    time = timer->array[TIME_PAIR];
+    time = timer->get_wall(Timer::DEPHASE);
     MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
     time = tmp/nprocs;
     if (me == 0) {
@@ -202,7 +256,7 @@ void Finish::end(int flag)
                 time,time/time_loop*100.0);
     }
 
-    time = timer->array[TIME_BOND];
+    time = timer->get_wall(Timer::DYNAMICS);
     MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
     time = tmp/nprocs;
     if (me == 0) {
@@ -214,7 +268,7 @@ void Finish::end(int flag)
                 time,time/time_loop*100.0);
     }
 
-    time = timer->array[TIME_KSPACE];
+    time = timer->get_wall(Timer::QUENCH);
     MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
     time = tmp/nprocs;
     if (me == 0) {
@@ -226,10 +280,35 @@ void Finish::end(int flag)
                 time,time/time_loop*100.0);
     }
 
-    time = time_other;
+      time = timer->get_wall(Timer::REPCOMM);
     MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
     time = tmp/nprocs;
     if (me == 0) {
+      if (screen)
+        fprintf(screen,"  Comm     time (%%) = %g (%g)\n",
+                time,time/time_loop*100.0);
+      if (logfile)
+        fprintf(logfile,"  Comm     time (%%) = %g (%g)\n",
+                time,time/time_loop*100.0);
+    }
+
+
+    time = timer->get_wall(Timer::REPOUT);
+    MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+    time = tmp/nprocs;
+    if (me == 0) {
+      if (screen)
+        fprintf(screen,"  Output   time (%%) = %g (%g)\n",
+                time,time/time_loop*100.0);
+      if (logfile)
+        fprintf(logfile,"  Output   time (%%) = %g (%g)\n",
+                time,time/time_loop*100.0);
+    }
+
+    time = time_other;
+    MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+    time = tmp/nprocs;
+    if (me == 0) { // XXXX: replica comm, replica output
       if (screen)
         fprintf(screen,"  Other    time (%%) = %g (%g)\n",
                 time,time/time_loop*100.0);
@@ -250,7 +329,7 @@ void Finish::end(int flag)
     if (screen) fprintf(screen,"TAD stats:\n");
     if (logfile) fprintf(logfile,"TAD stats:\n");
 
-    time = timer->array[TIME_PAIR];
+    time = timer->get_wall(Timer::NEB);
     MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
     time = tmp/nprocs;
     if (me == 0) {
@@ -262,7 +341,7 @@ void Finish::end(int flag)
                 time,time/time_loop*100.0);
     }
 
-    time = timer->array[TIME_BOND];
+    time = timer->get_wall(Timer::DYNAMICS);
     MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
     time = tmp/nprocs;
     if (me == 0) {
@@ -274,7 +353,7 @@ void Finish::end(int flag)
                 time,time/time_loop*100.0);
     }
 
-    time = timer->array[TIME_KSPACE];
+    time = timer->get_wall(Timer::QUENCH);
     MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
     time = tmp/nprocs;
     if (me == 0) {
@@ -287,7 +366,7 @@ void Finish::end(int flag)
     }
 
 
-    time = timer->array[TIME_COMM];
+    time = timer->get_wall(Timer::REPCOMM);
     MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
     time = tmp/nprocs;
     if (me == 0) {
@@ -300,7 +379,7 @@ void Finish::end(int flag)
     }
 
 
-    time = timer->array[TIME_OUTPUT];
+    time = timer->get_wall(Timer::REPOUT);
     MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
     time = tmp/nprocs;
     if (me == 0) {
@@ -325,102 +404,109 @@ void Finish::end(int flag)
     }
   }
 
-  // timing breakdowns
-
-  if (timeflag) {
-    if (me == 0) {
-      if (screen) fprintf(screen,"\n");
-      if (logfile) fprintf(logfile,"\n");
-    }
-
-    time = timer->array[TIME_PAIR];
-    MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
-    time = tmp/nprocs;
-    if (me == 0) {
-      if (screen)
-        fprintf(screen,"Pair  time (%%) = %g (%g)\n",
-                time,time/time_loop*100.0);
-      if (logfile)
-        fprintf(logfile,"Pair  time (%%) = %g (%g)\n",
-                time,time/time_loop*100.0);
-    }
+  if (timeflag && timer->has_normal()) {
 
-    if (atom->molecular) {
-      time = timer->array[TIME_BOND];
-      MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
-      time = tmp/nprocs;
+    if (timer->has_full()) {
+      const char hdr[] = "\nMPI task timing breakdown:\n"
+        "Section |  min time  |  avg time  |  max time  |%varavg|  %CPU | %total\n"
+        "-----------------------------------------------------------------------\n";
       if (me == 0) {
-        if (screen)
-          fprintf(screen,"Bond  time (%%) = %g (%g)\n",
-                  time,time/time_loop*100.0);
-        if (logfile)
-          fprintf(logfile,"Bond  time (%%) = %g (%g)\n",
-                  time,time/time_loop*100.0);
+        if (screen)  fputs(hdr,screen);
+        if (logfile) fputs(hdr,logfile);
       }
-    }
-
-    if (force->kspace) {
-      time = timer->array[TIME_KSPACE];
-      MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
-      time = tmp/nprocs;
+    } else {
+      const char hdr[] = "\nMPI task timing breakdown:\n"
+        "Section |  min time  |  avg time  |  max time  |%varavg| %total\n"
+        "---------------------------------------------------------------\n";
       if (me == 0) {
-        if (screen)
-          fprintf(screen,"Kspce time (%%) = %g (%g)\n",
-                  time,time/time_loop*100.0);
-        if (logfile)
-          fprintf(logfile,"Kspce time (%%) = %g (%g)\n",
-                  time,time/time_loop*100.0);
+        if (screen)  fputs(hdr,screen);
+        if (logfile) fputs(hdr,logfile);
       }
     }
 
-    time = timer->array[TIME_NEIGHBOR];
-    MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
-    time = tmp/nprocs;
-    if (me == 0) {
-      if (screen)
-        fprintf(screen,"Neigh time (%%) = %g (%g)\n",
-                time,time/time_loop*100.0);
-      if (logfile)
-        fprintf(logfile,"Neigh time (%%) = %g (%g)\n",
-                time,time/time_loop*100.0);
-    }
+    mpi_timings("Pair",timer,Timer::PAIR, world,nprocs,
+                nthreads,me,time_loop,screen,logfile);
+
+    if (atom->molecular)
+      mpi_timings("Bond",timer,Timer::BOND,world,nprocs,
+                  nthreads,me,time_loop,screen,logfile);
+    
+    if (force->kspace)
+      mpi_timings("Kspace",timer,Timer::KSPACE,world,nprocs,
+                  nthreads,me,time_loop,screen,logfile);
+
+    mpi_timings("Neigh",timer,Timer::NEIGH,world,nprocs,
+                nthreads,me,time_loop,screen,logfile);
+    mpi_timings("Comm",timer,Timer::COMM,world,nprocs,
+                nthreads,me,time_loop,screen,logfile);
+    mpi_timings("Output",timer,Timer::OUTPUT,world,nprocs,
+                nthreads,me,time_loop,screen,logfile);
+    mpi_timings("Modify",timer,Timer::MODIFY,world,nprocs,
+                nthreads,me,time_loop,screen,logfile);
+    if (timer->has_sync())
+      mpi_timings("Sync",timer,Timer::SYNC,world,nprocs,
+                  nthreads,me,time_loop,screen,logfile);
 
-    time = timer->array[TIME_COMM];
+    time = time_other;
     MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
     time = tmp/nprocs;
+
+    const char *fmt;
+    if (timer->has_full())
+      fmt = "Other   |            |%- 12.4g|            |       |       |%6.2f\n";
+    else
+      fmt = "Other   |            |%- 12.4g|            |       |%6.2f\n";
+
     if (me == 0) {
-      if (screen)
-        fprintf(screen,"Comm  time (%%) = %g (%g)\n",
-                time,time/time_loop*100.0);
-      if (logfile)
-        fprintf(logfile,"Comm  time (%%) = %g (%g)\n",
-                time,time/time_loop*100.0);
+      if (screen) fprintf(screen,fmt,time,time/time_loop*100.0);
+      if (logfile) fprintf(logfile,fmt,time,time/time_loop*100.0);
     }
+  }
 
-    time = timer->array[TIME_OUTPUT];
-    MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
-    time = tmp/nprocs;
-    if (me == 0) {
-      if (screen)
-        fprintf(screen,"Outpt time (%%) = %g (%g)\n",
-                time,time/time_loop*100.0);
-      if (logfile)
-        fprintf(logfile,"Outpt time (%%) = %g (%g)\n",
-                time,time/time_loop*100.0);
+#ifdef LMP_USER_OMP
+  const char thr_hdr_fmt[] = 
+    "\nThread timing breakdown (MPI rank %d):\nTotal threaded time %.4g / %.1f%%\n";
+  const char thr_header[] =
+    "Section |  min time  |  avg time  |  max time  |%varavg| %total\n"
+    "---------------------------------------------------------------\n";
+
+  int ifix = modify->find_fix("package_omp");
+
+  // print thread breakdown only with full timer detail
+
+  if ((ifix >= 0) && timer->has_full() && me == 0) {
+    double thr_total = 0.0;
+    ThrData *td;
+    FixOMP *fixomp = static_cast<FixOMP *>(lmp->modify->fix[ifix]);
+    for (i=0; i < nthreads; ++i) {
+      td = fixomp->get_thr(i);
+      thr_total += td->get_time(Timer::ALL);
     }
+    thr_total /= (double) nthreads;
 
-    time = time_other;
-    MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
-    time = tmp/nprocs;
-    if (me == 0) {
-      if (screen)
-        fprintf(screen,"Other time (%%) = %g (%g)\n",
-                time,time/time_loop*100.0);
-      if (logfile)
-        fprintf(logfile,"Other time (%%) = %g (%g)\n",
-                time,time/time_loop*100.0);
+    if (thr_total > 0.0) {
+      if (screen) {
+        fprintf(screen,thr_hdr_fmt,me,thr_total,thr_total/time_loop*100.0);
+        fputs(thr_header,screen);
+      }
+      if (logfile) {
+        fprintf(logfile,thr_hdr_fmt,me,thr_total,thr_total/time_loop*100.0);
+        fputs(thr_header,logfile);
+      }
+
+      omp_times(fixomp,"Pair",Timer::PAIR,nthreads,screen,logfile);
+
+      if (atom->molecular)
+        omp_times(fixomp,"Bond",Timer::BOND,nthreads,screen,logfile);
+
+      if (force->kspace)
+        omp_times(fixomp,"Kspace",Timer::KSPACE,nthreads,screen,logfile);
+
+      omp_times(fixomp,"Neigh",Timer::NEIGH,nthreads,screen,logfile);
+      omp_times(fixomp,"Reduce",Timer::COMM,nthreads,screen,logfile);
     }
   }
+#endif
 
   // FFT timing statistics
   // time3d,time1d = total time during run for 3d and 1d FFTs
@@ -459,7 +545,7 @@ void Finish::end(int flag)
     MPI_Allreduce(&time1d,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
     time1d = tmp/nprocs;
 
-    double time_kspace = timer->array[TIME_KSPACE];
+    double time_kspace = timer->get_wall(Timer::KSPACE);
     MPI_Allreduce(&time_kspace,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
     time_kspace = tmp/nprocs;
 
@@ -536,8 +622,10 @@ void Finish::end(int flag)
            neighbor->old_requests[m]->gran ||
            neighbor->old_requests[m]->respaouter ||
            neighbor->old_requests[m]->half_from_full) &&
-          neighbor->old_requests[m]->skip == 0) {
-        if (lmp->kokkos && lmp->kokkos->neigh_list_kokkos(m)) break;
+          neighbor->old_requests[m]->skip == 0 &&
+          neighbor->lists[m] && neighbor->lists[m]->numneigh) {
+        if (!neighbor->lists[m] && lmp->kokkos &&
+            lmp->kokkos->neigh_list_kokkos(m)) break;
         else break;
       }
     }
@@ -584,13 +672,14 @@ void Finish::end(int flag)
 
     nneighfull = 0;
     if (m < neighbor->old_nrequest) {
-      if (neighbor->lists[m]) {
+      if (neighbor->lists[m] && neighbor->lists[m]->numneigh) {
         int inum = neighbor->lists[m]->inum;
         int *ilist = neighbor->lists[m]->ilist;
         int *numneigh = neighbor->lists[m]->numneigh;
         for (i = 0; i < inum; i++)
           nneighfull += numneigh[ilist[i]];
-      } else if (lmp->kokkos) nneighfull = lmp->kokkos->neigh_count(m);
+      } else if (!neighbor->lists[m] && lmp->kokkos)
+          nneighfull = lmp->kokkos->neigh_count(m);
 
       tmp = nneighfull;
       stats(1,&tmp,&ave,&max,&min,10,histo);
@@ -622,7 +711,7 @@ void Finish::end(int flag)
     MPI_Allreduce(&tmp,&nall,1,MPI_DOUBLE,MPI_SUM,world);
 
     int nspec;
-    double nspec_all;
+    double nspec_all = 0;
     if (atom->molecular == 1) {
       int **nspecial = atom->nspecial;
       int nlocal = atom->nlocal;
@@ -731,3 +820,95 @@ void Finish::stats(int n, double *data,
   *pmax = max;
   *pmin = min;
 }
+
+/* ---------------------------------------------------------------------- */
+
+static void mpi_timings(const char *label, Timer *t, enum Timer::ttype tt,
+                        MPI_Comm world, const int nprocs, const int nthreads,
+                        const int me, double time_loop, FILE *scr, FILE *log)
+{
+  double tmp, time_max, time_min, time_sq;
+  double time = t->get_wall(tt);
+  
+  double time_cpu = t->get_cpu(tt);
+  if (time/time_loop < 0.001)  // insufficient timer resolution!
+    time_cpu = 1.0;
+  else
+    time_cpu = time_cpu / time;
+  if (time_cpu > nthreads) time_cpu = nthreads;
+
+  MPI_Allreduce(&time,&time_min,1,MPI_DOUBLE,MPI_MIN,world);
+  MPI_Allreduce(&time,&time_max,1,MPI_DOUBLE,MPI_MAX,world);
+  time_sq = time*time;
+  MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+  time = tmp/nprocs;
+  MPI_Allreduce(&time_sq,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+  time_sq = tmp/nprocs;
+  MPI_Allreduce(&time_cpu,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+  time_cpu = tmp/nprocs*100.0;
+
+  // % variance from the average as measure of load imbalance
+  if (time > 1.0e-10)
+    time_sq = sqrt(time_sq/time - time)*100.0;
+  else
+    time_sq = 0.0;
+
+
+  if (me == 0) {
+    tmp = time/time_loop*100.0;
+    if (t->has_full()) {
+      const char fmt[] = "%-8s|%- 12.5g|%- 12.5g|%- 12.5g|%6.1f |%6.1f |%6.2f\n";
+      if (scr)
+        fprintf(scr,fmt,label,time_min,time,time_max,time_sq,time_cpu,tmp);
+      if (log)
+        fprintf(log,fmt,label,time_min,time,time_max,time_sq,time_cpu,tmp);
+      time_loop = 100.0/time_loop;
+    } else {
+      const char fmt[] = "%-8s|%- 12.5g|%- 12.5g|%- 12.5g|%6.1f |%6.2f\n";
+      if (scr)
+        fprintf(scr,fmt,label,time_min,time,time_max,time_sq,tmp);
+      if (log)
+        fprintf(log,fmt,label,time_min,time,time_max,time_sq,tmp);
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+#ifdef LMP_USER_OMP
+static void omp_times(FixOMP *fix, const char *label, enum Timer::ttype which,
+                      const int nthreads,FILE *scr, FILE *log)
+{
+  const char fmt[] = "%-8s|%- 12.5g|%- 12.5g|%- 12.5g|%6.1f |%6.2f\n";
+  double time_min, time_max, time_avg, time_total, time_std;
+
+  time_min =  1.0e100;
+  time_max = -1.0e100;
+  time_total = time_avg = time_std = 0.0;
+
+  for (int i=0; i < nthreads; ++i) {
+    ThrData *thr = fix->get_thr(i);
+    double tmp=thr->get_time(which);
+    time_min = MIN(time_min,tmp);
+    time_max = MAX(time_max,tmp);
+    time_avg += tmp;
+    time_std += tmp*tmp;
+    time_total += thr->get_time(Timer::ALL);
+  }
+
+  time_avg /= nthreads;
+  time_std /= nthreads;
+  time_total /= nthreads;
+
+  if (time_avg > 1.0e-10)
+    time_std = sqrt(time_std/time_avg - time_avg)*100.0;
+  else
+    time_std = 0.0;
+
+  if (scr) fprintf(scr,fmt,label,time_min,time_avg,time_max,time_std,
+                   time_avg/time_total*100.0);
+  if (log) fprintf(log,fmt,label,time_min,time_avg,time_max,time_std,
+                   time_avg/time_total*100.0);
+}
+#endif
+
diff --git a/src/input.cpp b/src/input.cpp
index 455b8dc869..da1b8a0bfc 100644
--- a/src/input.cpp
+++ b/src/input.cpp
@@ -43,6 +43,7 @@
 #include "update.h"
 #include "neighbor.h"
 #include "special.h"
+#include "timer.h"
 #include "variable.h"
 #include "accelerator_cuda.h"
 #include "accelerator_kokkos.h"
@@ -685,6 +686,7 @@ int Input::execute_command()
   else if (!strcmp(command,"thermo_modify")) thermo_modify();
   else if (!strcmp(command,"thermo_style")) thermo_style();
   else if (!strcmp(command,"timestep")) timestep();
+  else if (!strcmp(command,"timers")) timers();
   else if (!strcmp(command,"uncompute")) uncompute();
   else if (!strcmp(command,"undump")) undump();
   else if (!strcmp(command,"unfix")) unfix();
@@ -1037,6 +1039,7 @@ void Input::print()
     if (strcmp(arg[iarg],"file") == 0 || strcmp(arg[iarg],"append") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal print command");
       if (me == 0) {
+        if (fp != NULL) fclose(fp);
         if (strcmp(arg[iarg],"file") == 0) fp = fopen(arg[iarg+1],"w");
         else fp = fopen(arg[iarg+1],"a");
         if (fp == NULL) {
@@ -1741,6 +1744,13 @@ void Input::thermo_style()
 
 /* ---------------------------------------------------------------------- */
 
+void Input::timers()
+{
+  timer->modify_params(narg,arg);
+}
+
+/* ---------------------------------------------------------------------- */
+
 void Input::timestep()
 {
   if (narg != 1) error->all(FLERR,"Illegal timestep command");
diff --git a/src/input.h b/src/input.h
index 5863e0cefe..6637d97373 100644
--- a/src/input.h
+++ b/src/input.h
@@ -62,7 +62,7 @@ class Input : protected Pointers {
   void reallocate(char *&, int &, int);  // reallocate a char string
   int execute_command();                 // execute a single command
 
-  void clear();                // input script commands
+  void clear();                 // input script commands
   void echo();
   void ifthenelse();
   void include();
@@ -77,7 +77,7 @@ class Input : protected Pointers {
   void shell();
   void variable_command();
 
-  void angle_coeff();          // LAMMPS commands
+  void angle_coeff();           // LAMMPS commands
   void angle_style();
   void atom_modify();
   void atom_style();
@@ -126,6 +126,7 @@ class Input : protected Pointers {
   void thermo_modify();
   void thermo_style();
   void timestep();
+  void timers();
   void uncompute();
   void undump();
   void unfix();
diff --git a/src/min.cpp b/src/min.cpp
index 12afac4037..b06ccfc638 100644
--- a/src/min.cpp
+++ b/src/min.cpp
@@ -459,9 +459,13 @@ double Min::energy_force(int resetflag)
   if (nflag == 0) {
     timer->stamp();
     comm->forward_comm();
-    timer->stamp(TIME_COMM);
+    timer->stamp(Timer::COMM);
   } else {
-    if (modify->n_min_pre_exchange) modify->min_pre_exchange();
+    if (modify->n_min_pre_exchange) {
+      timer->stamp();
+      modify->min_pre_exchange();
+      timer->stamp(Timer::MODIFY);
+    }
     if (triclinic) domain->x2lamda(atom->nlocal);
     domain->pbc();
     if (domain->box_change) {
@@ -475,20 +479,24 @@ double Min::energy_force(int resetflag)
         update->ntimestep >= atom->nextsort) atom->sort();
     comm->borders();
     if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
-    timer->stamp(TIME_COMM);
+    timer->stamp(Timer::COMM);
     neighbor->build();
-    timer->stamp(TIME_NEIGHBOR);
+    timer->stamp(Timer::NEIGH);
   }
 
   ev_set(update->ntimestep);
   force_clear();
-  if (modify->n_min_pre_force) modify->min_pre_force(vflag);
 
   timer->stamp();
 
+  if (modify->n_min_pre_force) {
+    modify->min_pre_force(vflag);
+    timer->stamp(Timer::MODIFY);
+  }
+
   if (pair_compute_flag) {
     force->pair->compute(eflag,vflag);
-    timer->stamp(TIME_PAIR);
+    timer->stamp(Timer::PAIR);
   }
 
   if (atom->molecular) {
@@ -496,17 +504,17 @@ double Min::energy_force(int resetflag)
     if (force->angle) force->angle->compute(eflag,vflag);
     if (force->dihedral) force->dihedral->compute(eflag,vflag);
     if (force->improper) force->improper->compute(eflag,vflag);
-    timer->stamp(TIME_BOND);
+    timer->stamp(Timer::BOND);
   }
 
   if (kspace_compute_flag) {
     force->kspace->compute(eflag,vflag);
-    timer->stamp(TIME_KSPACE);
+    timer->stamp(Timer::KSPACE);
   }
 
   if (force->newton) {
     comm->reverse_comm();
-    timer->stamp(TIME_COMM);
+    timer->stamp(Timer::COMM);
   }
 
   // update per-atom minimization variables stored by pair styles
@@ -517,7 +525,11 @@ double Min::energy_force(int resetflag)
 
   // fixes that affect minimization
 
-  if (modify->n_min_post_force) modify->min_post_force(vflag);
+  if (modify->n_min_post_force) {
+     timer->stamp();
+     modify->min_post_force(vflag);
+     timer->stamp(Timer::MODIFY);
+  }
 
   // compute potential energy of system
   // normalize if thermo PE does
diff --git a/src/min_cg.cpp b/src/min_cg.cpp
index f88426f587..4953370562 100644
--- a/src/min_cg.cpp
+++ b/src/min_cg.cpp
@@ -175,7 +175,7 @@ int MinCG::iterate(int maxiter)
     if (output->next == ntimestep) {
       timer->stamp();
       output->write(ntimestep);
-      timer->stamp(TIME_OUTPUT);
+      timer->stamp(Timer::OUTPUT);
     }
   }
 
diff --git a/src/min_fire.cpp b/src/min_fire.cpp
index 6ef26e7eaa..8d0debf349 100644
--- a/src/min_fire.cpp
+++ b/src/min_fire.cpp
@@ -266,7 +266,7 @@ int MinFire::iterate(int maxiter)
     if (output->next == ntimestep) {
       timer->stamp();
       output->write(ntimestep);
-      timer->stamp(TIME_OUTPUT);
+      timer->stamp(Timer::OUTPUT);
     }
   }
 
diff --git a/src/min_hftn.cpp b/src/min_hftn.cpp
index cb47934380..cdd9bcda3b 100644
--- a/src/min_hftn.cpp
+++ b/src/min_hftn.cpp
@@ -535,7 +535,7 @@ int MinHFTN::execute_hftn_(const bool      bPrintProgress,
       }
       timer->stamp();
       output->write (update->ntimestep);
-      timer->stamp (TIME_OUTPUT);
+      timer->stamp (Timer::OUTPUT);
     }
 
     //---- RETURN IF NUMBER OF EVALUATIONS EXCEEDED.
diff --git a/src/min_quickmin.cpp b/src/min_quickmin.cpp
index 7de5dc6c99..124b5bf575 100644
--- a/src/min_quickmin.cpp
+++ b/src/min_quickmin.cpp
@@ -232,7 +232,7 @@ int MinQuickMin::iterate(int maxiter)
     if (output->next == ntimestep) {
       timer->stamp();
       output->write(ntimestep);
-      timer->stamp(TIME_OUTPUT);
+      timer->stamp(Timer::OUTPUT);
     }
   }
 
diff --git a/src/min_sd.cpp b/src/min_sd.cpp
index 80cad3e135..44936ce32a 100644
--- a/src/min_sd.cpp
+++ b/src/min_sd.cpp
@@ -100,7 +100,7 @@ int MinSD::iterate(int maxiter)
     if (output->next == ntimestep) {
       timer->stamp();
       output->write(ntimestep);
-      timer->stamp(TIME_OUTPUT);
+      timer->stamp(Timer::OUTPUT);
     }
   }
 
diff --git a/src/minimize.cpp b/src/minimize.cpp
index 44a037c49f..b927ee9270 100644
--- a/src/minimize.cpp
+++ b/src/minimize.cpp
@@ -54,9 +54,9 @@ void Minimize::command(int narg, char **arg)
   update->minimize->setup();
 
   timer->init();
-  timer->barrier_start(TIME_LOOP);
+  timer->barrier_start();
   update->minimize->run(update->nsteps);
-  timer->barrier_stop(TIME_LOOP);
+  timer->barrier_stop();
 
   update->minimize->cleanup();
 
diff --git a/src/neighbor.cpp b/src/neighbor.cpp
index 34d8d692eb..493defc7e1 100644
--- a/src/neighbor.cpp
+++ b/src/neighbor.cpp
@@ -814,7 +814,8 @@ void Neighbor::init()
         fprintf(logfile,"  ghost atom cutoff = %g\n",cutghost);
         if (style != NSQ)
           fprintf(logfile,"  binsize = %g -> bins = %g %g %g\n",binsize,
-	          ceil(bbox[0]/binsize), ceil(bbox[1]/binsize), ceil(bbox[2]/binsize));
+	          ceil(bbox[0]/binsize), ceil(bbox[1]/binsize), 
+                  ceil(bbox[2]/binsize));
       }
       if (screen) {
         fprintf(screen,"Neighbor list info ...\n");
@@ -825,7 +826,8 @@ void Neighbor::init()
         fprintf(screen,"  ghost atom cutoff = %g\n",cutghost);
         if (style != NSQ)
           fprintf(screen,"  binsize = %g, bins = %g %g %g\n",binsize,
-	          ceil(bbox[0]/binsize), ceil(bbox[1]/binsize), ceil(bbox[2]/binsize));
+	          ceil(bbox[0]/binsize), ceil(bbox[1]/binsize),
+                  ceil(bbox[2]/binsize));
       }
     }
   }
diff --git a/src/rerun.cpp b/src/rerun.cpp
index 08e7ccf42a..bb98f23dfa 100644
--- a/src/rerun.cpp
+++ b/src/rerun.cpp
@@ -142,7 +142,7 @@ void Rerun::command(int narg, char **arg)
   lmp->init();
 
   timer->init();
-  timer->barrier_start(TIME_LOOP);
+  timer->barrier_start();
 
   bigint ntimestep = rd->seek(first,0);
   if (ntimestep < 0)
@@ -172,7 +172,7 @@ void Rerun::command(int narg, char **arg)
   output->next_thermo = update->ntimestep;
   output->write(update->ntimestep);
 
-  timer->barrier_stop(TIME_LOOP);
+  timer->barrier_stop();
 
   update->integrate->cleanup();
 
diff --git a/src/respa.cpp b/src/respa.cpp
index 3af1dafcfb..632d2a109f 100644
--- a/src/respa.cpp
+++ b/src/respa.cpp
@@ -452,6 +452,7 @@ void Respa::setup()
       force->kspace->setup();
       if (kspace_compute_flag) force->kspace->compute(eflag,vflag);
     }
+
     if (newton[ilevel]) comm->reverse_comm();
     copy_f_flevel(ilevel);
   }
@@ -554,12 +555,16 @@ void Respa::run(int n)
 
     sum_flevel_f();
 
-    if (modify->n_end_of_step) modify->end_of_step();
+    if (modify->n_end_of_step) {
+      timer->stamp();
+      modify->end_of_step();
+      timer->stamp(Timer::MODIFY);
+    }
 
     if (ntimestep == output->next) {
       timer->stamp();
       output->write(update->ntimestep);
-      timer->stamp(TIME_OUTPUT);
+      timer->stamp(Timer::OUTPUT);
     }
   }
 }
@@ -593,9 +598,11 @@ void Respa::recurse(int ilevel)
 
   for (int iloop = 0; iloop < loop[ilevel]; iloop++) {
 
+    timer->stamp();
     modify->initial_integrate_respa(vflag,ilevel,iloop);
     if (modify->n_post_integrate_respa)
       modify->post_integrate_respa(ilevel,iloop);
+    timer->stamp(Timer::MODIFY);
 
     // at outermost level, check on rebuilding neighbor list
     // at innermost level, communicate
@@ -604,7 +611,11 @@ void Respa::recurse(int ilevel)
     if (ilevel == nlevels-1) {
       int nflag = neighbor->decide();
       if (nflag) {
-        if (modify->n_pre_exchange) modify->pre_exchange();
+        if (modify->n_pre_exchange) {
+          timer->stamp();
+          modify->pre_exchange();
+          timer->stamp(Timer::MODIFY);
+        }
         if (triclinic) domain->x2lamda(atom->nlocal);
         domain->pbc();
         if (domain->box_change) {
@@ -614,24 +625,27 @@ void Respa::recurse(int ilevel)
         }
         timer->stamp();
         comm->exchange();
-        if (atom->sortfreq > 0 &&
+        if (atom->sortfreq > 0 && 
             update->ntimestep >= atom->nextsort) atom->sort();
         comm->borders();
         if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
-        timer->stamp(TIME_COMM);
-        if (modify->n_pre_neighbor) modify->pre_neighbor();
+        timer->stamp(Timer::COMM);
+        if (modify->n_pre_neighbor) {
+          modify->pre_neighbor();
+          timer->stamp(Timer::MODIFY);
+        }
         neighbor->build();
-        timer->stamp(TIME_NEIGHBOR);
+        timer->stamp(Timer::NEIGH);
       } else if (ilevel == 0) {
         timer->stamp();
         comm->forward_comm();
-        timer->stamp(TIME_COMM);
+        timer->stamp(Timer::COMM);
       }
 
     } else if (ilevel == 0) {
       timer->stamp();
       comm->forward_comm();
-      timer->stamp(TIME_COMM);
+      timer->stamp(Timer::COMM);
     }
 
     // rRESPA recursion thru all levels
@@ -648,60 +662,64 @@ void Respa::recurse(int ilevel)
     // when potentials are invoked at same level
 
     force_clear(newton[ilevel]);
-    if (modify->n_pre_force_respa)
+    if (modify->n_pre_force_respa) {
+      timer->stamp();
       modify->pre_force_respa(vflag,ilevel,iloop);
+      timer->stamp(Timer::MODIFY);
+    }
 
     timer->stamp();
     if (nhybrid_styles > 0) {
       set_compute_flags(ilevel);
       force->pair->compute(eflag,vflag);
-      timer->stamp(TIME_PAIR);
+      timer->stamp(Timer::PAIR);
     }
     if (level_pair == ilevel && pair_compute_flag) {
       force->pair->compute(eflag,vflag);
-      timer->stamp(TIME_PAIR);
+      timer->stamp(Timer::PAIR);
     }
     if (level_inner == ilevel && pair_compute_flag) {
       force->pair->compute_inner();
-      timer->stamp(TIME_PAIR);
+      timer->stamp(Timer::PAIR);
     }
     if (level_middle == ilevel && pair_compute_flag) {
       force->pair->compute_middle();
-      timer->stamp(TIME_PAIR);
+      timer->stamp(Timer::PAIR);
     }
     if (level_outer == ilevel && pair_compute_flag) {
       force->pair->compute_outer(eflag,vflag);
-      timer->stamp(TIME_PAIR);
+      timer->stamp(Timer::PAIR);
     }
     if (level_bond == ilevel && force->bond) {
       force->bond->compute(eflag,vflag);
-      timer->stamp(TIME_BOND);
+      timer->stamp(Timer::BOND);
     }
     if (level_angle == ilevel && force->angle) {
       force->angle->compute(eflag,vflag);
-      timer->stamp(TIME_BOND);
+      timer->stamp(Timer::BOND);
     }
     if (level_dihedral == ilevel && force->dihedral) {
       force->dihedral->compute(eflag,vflag);
-      timer->stamp(TIME_BOND);
+      timer->stamp(Timer::BOND);
     }
     if (level_improper == ilevel && force->improper) {
       force->improper->compute(eflag,vflag);
-      timer->stamp(TIME_BOND);
+      timer->stamp(Timer::BOND);
     }
     if (level_kspace == ilevel && kspace_compute_flag) {
       force->kspace->compute(eflag,vflag);
-      timer->stamp(TIME_KSPACE);
+      timer->stamp(Timer::KSPACE);
     }
 
     if (newton[ilevel]) {
       comm->reverse_comm();
-      timer->stamp(TIME_COMM);
+      timer->stamp(Timer::COMM);
     }
-
+    timer->stamp();
     if (modify->n_post_force_respa)
       modify->post_force_respa(vflag,ilevel,iloop);
     modify->final_integrate_respa(ilevel,iloop);
+    timer->stamp(Timer::MODIFY);
   }
 
   copy_f_flevel(ilevel);
diff --git a/src/run.cpp b/src/run.cpp
index 455f5b07a8..62b888114f 100644
--- a/src/run.cpp
+++ b/src/run.cpp
@@ -171,9 +171,9 @@ void Run::command(int narg, char **arg)
     } else output->setup(0);
 
     timer->init();
-    timer->barrier_start(TIME_LOOP);
+    timer->barrier_start();
     update->integrate->run(nsteps);
-    timer->barrier_stop(TIME_LOOP);
+    timer->barrier_stop();
 
     update->integrate->cleanup();
 
@@ -209,9 +209,9 @@ void Run::command(int narg, char **arg)
       } else output->setup(0);
 
       timer->init();
-      timer->barrier_start(TIME_LOOP);
+      timer->barrier_start();
       update->integrate->run(nsteps);
-      timer->barrier_stop(TIME_LOOP);
+      timer->barrier_stop();
 
       update->integrate->cleanup();
 
diff --git a/src/thermo.cpp b/src/thermo.cpp
index ee7ab0fa76..bef74d0358 100644
--- a/src/thermo.cpp
+++ b/src/thermo.cpp
@@ -46,6 +46,9 @@
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
+#include "universe.h"
+
+#include "math_const.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
@@ -335,7 +338,7 @@ void Thermo::compute(int flag)
   int loc = 0;
   if (lineflag == MULTILINE) {
     double cpu;
-    if (flag) cpu = timer->elapsed(TIME_LOOP);
+    if (flag) cpu = timer->elapsed(Timer::TOTAL);
     else cpu = 0.0;
     loc = sprintf(&line[loc],format_multi,ntimestep,cpu);
   }
@@ -1520,7 +1523,7 @@ void Thermo::compute_time()
 void Thermo::compute_cpu()
 {
   if (firststep == 0) dvalue = 0.0;
-  else dvalue = timer->elapsed(TIME_LOOP);
+  else dvalue = timer->elapsed(Timer::TOTAL);
 }
 
 /* ---------------------------------------------------------------------- */
@@ -1534,7 +1537,7 @@ void Thermo::compute_tpcpu()
     new_cpu = 0.0;
     dvalue = 0.0;
   } else {
-    new_cpu = timer->elapsed(TIME_LOOP);
+    new_cpu = timer->elapsed(Timer::TOTAL);
     double cpu_diff = new_cpu - last_tpcpu;
     double time_diff = new_time - last_time;
     if (time_diff > 0.0 && cpu_diff > 0.0) dvalue = time_diff/cpu_diff;
@@ -1556,7 +1559,7 @@ void Thermo::compute_spcpu()
     new_cpu = 0.0;
     dvalue = 0.0;
   } else {
-    new_cpu = timer->elapsed(TIME_LOOP);
+    new_cpu = timer->elapsed(Timer::TOTAL);
     double cpu_diff = new_cpu - last_spcpu;
     int step_diff = new_step - last_step;
     if (cpu_diff > 0.0) dvalue = step_diff/cpu_diff;
@@ -1572,7 +1575,7 @@ void Thermo::compute_spcpu()
 void Thermo::compute_cpuremain()
 {
   if (firststep == 0) dvalue = 0.0;
-  else dvalue = timer->elapsed(TIME_LOOP) * 
+  else dvalue = timer->elapsed(Timer::TOTAL) * 
          (update->laststep - update->ntimestep) /
          (update->ntimestep - update->firststep);
 }
@@ -2093,3 +2096,4 @@ void Thermo::compute_cellgamma()
     dvalue = acos(cosgamma)*180.0/MY_PI;
   }
 }
+
diff --git a/src/timer.cpp b/src/timer.cpp
index 329de00d60..2fb8453684 100644
--- a/src/timer.cpp
+++ b/src/timer.cpp
@@ -12,73 +12,201 @@
 ------------------------------------------------------------------------- */
 
 #include "mpi.h"
+#include "string.h"
 #include "timer.h"
+#include "comm.h"
+#include "error.h"
 #include "memory.h"
 
+#ifdef _WIN32
+#include <windows.h>
+#include <stdint.h>
+#else
+#include <sys/time.h>
+#include <sys/resource.h>
+#endif
+
 using namespace LAMMPS_NS;
 
-/* ---------------------------------------------------------------------- */
+// Return the CPU time for the current process in seconds very
+// much in the same way as MPI_Wtime() returns the wall time.
 
-Timer::Timer(LAMMPS *lmp) : Pointers(lmp)
+static double CPU_Time()
 {
-  memory->create(array,TIME_N,"array");
+  double rv = 0.0;
+
+#ifdef _WIN32
+
+  // from MSD docs.
+  FILETIME ct,et,kt,ut;
+  union { FILETIME ft; uint64_t ui; } cpu;
+  if (GetProcessTimes(GetCurrentProcess(),&ct,&et,&kt,&ut)) {
+    cpu.ft = ut;
+    rv = cpu.ui * 0.0000001;
+  }
+
+#else /* ! _WIN32 */
+
+  struct rusage ru;
+  if (getrusage(RUSAGE_SELF, &ru) == 0) {
+    rv = (double) ru.ru_utime.tv_sec;
+    rv += (double) ru.ru_utime.tv_usec * 0.000001;
+  }
+
+#endif /* ! _WIN32 */
+
+  return rv;
 }
 
 /* ---------------------------------------------------------------------- */
 
-Timer::~Timer()
+Timer::Timer(LAMMPS *lmp) : Pointers(lmp)
 {
-  memory->destroy(array);
+  _level = NORMAL;
+  _sync  = OFF;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void Timer::init()
 {
-  for (int i = 0; i < TIME_N; i++) array[i] = 0.0;
+  for (int i = 0; i < NUM_TIMER; i++) {
+    cpu_array[i] = 0.0;
+    wall_array[i] = 0.0;
+  }
 }
 
 /* ---------------------------------------------------------------------- */
 
-void Timer::stamp()
+void Timer::_stamp(enum ttype which)
 {
-  // uncomment if want synchronized timing
-  // MPI_Barrier(world);
-  previous_time = MPI_Wtime();
+  double current_cpu=0.0, current_wall=0.0;
+
+  if (_level > NORMAL) current_cpu = CPU_Time();
+  current_wall = MPI_Wtime();
+
+  if ((which > TOTAL) && (which < NUM_TIMER)) {
+    const double delta_cpu = current_cpu - previous_cpu;
+    const double delta_wall = current_wall - previous_wall;
+    
+    cpu_array[which]  += delta_cpu;
+    wall_array[which] += delta_wall;
+    cpu_array[ALL]    += delta_cpu;
+    wall_array[ALL]   += delta_wall;
+  }
+
+  previous_cpu  = current_cpu;
+  previous_wall = current_wall;
+
+  if (which == RESET) {
+    this->init();
+    cpu_array[TOTAL] = current_cpu;
+    wall_array[TOTAL] = current_wall;
+  }
+
+  if (_sync) {
+    MPI_Barrier(world);
+    if (_level > NORMAL) current_cpu = CPU_Time();
+    current_wall = MPI_Wtime();
+
+    cpu_array[SYNC]  += current_cpu - previous_cpu;
+    wall_array[SYNC] += current_wall - previous_wall;
+    previous_cpu  = current_cpu;
+    previous_wall = current_wall;
+  }
 }
 
 /* ---------------------------------------------------------------------- */
 
-void Timer::stamp(int which)
+void Timer::barrier_start()
 {
-  // uncomment if want synchronized timing
-  // MPI_Barrier(world);
-  double current_time = MPI_Wtime();
-  array[which] += current_time - previous_time;
-  previous_time = current_time;
+  double current_cpu=0.0, current_wall=0.0;
+
+  MPI_Barrier(world);
+
+  if (_level < LOOP) return;
+
+  current_cpu = CPU_Time();
+  current_wall = MPI_Wtime();
+
+  cpu_array[TOTAL]  = current_cpu;
+  wall_array[TOTAL] = current_wall;
+  previous_cpu  = current_cpu;
+  previous_wall = current_wall;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void Timer::barrier_start(int which)
+void Timer::barrier_stop()
 {
+  double current_cpu=0.0, current_wall=0.0;
+
   MPI_Barrier(world);
-  array[which] = MPI_Wtime();
+
+  if (_level < LOOP) return;
+
+  current_cpu = CPU_Time();
+  current_wall = MPI_Wtime();
+
+  cpu_array[TOTAL]  = current_cpu - cpu_array[TOTAL];
+  wall_array[TOTAL] = current_wall - wall_array[TOTAL];
 }
 
 /* ---------------------------------------------------------------------- */
 
-void Timer::barrier_stop(int which)
+double Timer::cpu(enum ttype which)
 {
-  MPI_Barrier(world);
-  double current_time = MPI_Wtime();
-  array[which] = current_time - array[which];
+  double current_cpu = CPU_Time();
+  return (current_cpu - cpu_array[which]);
 }
 
 /* ---------------------------------------------------------------------- */
 
-double Timer::elapsed(int which)
+double Timer::elapsed(enum ttype which)
+{
+  if (_level == OFF) return 0.0;
+  double current_wall = MPI_Wtime();
+  return (current_wall - wall_array[which]);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void Timer::set_wall(enum ttype which, double newtime)
+{
+  wall_array[which] = newtime;
+}
+
+/* ----------------------------------------------------------------------
+   modify parameters of the Timer class
+------------------------------------------------------------------------- */
+static const char *timer_style[] = { "off", "loop", "normal", "full" };
+static const char *timer_mode[]  = { "nosync", "(dummy)", "sync" };
+static const char  timer_fmt[]   = "New timer settings: style=%s  mode=%s\n";
+
+void Timer::modify_params(int narg, char **arg)
 {
-  double current_time = MPI_Wtime();
-  return (current_time - array[which]);
+  int iarg = 0;
+  while (iarg < narg) {
+    if (strcmp(arg[iarg],timer_style[OFF])           == 0) {
+      _level = OFF;
+    } else if (strcmp(arg[iarg],timer_style[LOOP]) == 0) {
+      _level = LOOP;
+    } else if (strcmp(arg[iarg],timer_style[NORMAL]) == 0) {
+      _level = NORMAL;
+    } else if (strcmp(arg[iarg],timer_style[FULL])   == 0) {
+      _level = FULL;
+    } else if (strcmp(arg[iarg],timer_mode[OFF])     == 0) {
+      _sync  = OFF;
+    } else if (strcmp(arg[iarg],timer_mode[NORMAL])  == 0) {
+      _sync  = NORMAL;
+    } else error->all(FLERR,"Illegal timers command");
+    ++iarg;  
+  }
+
+  if (comm->me == 0) {
+    if (screen)
+      fprintf(screen,timer_fmt,timer_style[_level],timer_mode[_sync]);
+    if (logfile)
+      fprintf(logfile,timer_fmt,timer_style[_level],timer_mode[_sync]);
+  }
 }
diff --git a/src/timer.h b/src/timer.h
index d62b764c49..adf554f468 100644
--- a/src/timer.h
+++ b/src/timer.h
@@ -16,26 +16,60 @@
 
 #include "pointers.h"
 
-enum{TIME_LOOP,TIME_PAIR,TIME_BOND,TIME_KSPACE,TIME_NEIGHBOR,
-     TIME_COMM,TIME_OUTPUT,TIME_N};
 
 namespace LAMMPS_NS {
 
 class Timer : protected Pointers {
  public:
-  double *array;
+
+  enum ttype  {RESET=-2,START=-1,TOTAL=0,PAIR,BOND,KSPACE,NEIGH,COMM,
+               MODIFY,OUTPUT,SYNC,ALL,DEPHASE,DYNAMICS,QUENCH,NEB,REPCOMM,
+               REPOUT,NUM_TIMER};
+  enum tlevel {OFF=0,LOOP,NORMAL,FULL};
 
   Timer(class LAMMPS *);
-  ~Timer();
+  ~Timer() {};
   void init();
-  void stamp();
-  void stamp(int);
-  void barrier_start(int);
-  void barrier_stop(int);
-  double elapsed(int);
+
+  // inline function to reduce overhead if we want no detailed timings
+
+  void stamp(enum ttype which=START) {
+    if (_level > LOOP) _stamp(which);
+  }
+  
+  void barrier_start();
+  void barrier_stop();
+
+  // accessor methods for supported level of detail
+
+  bool has_loop()   const { return (_level >= LOOP); }
+  bool has_normal() const { return (_level >= NORMAL); }
+  bool has_full()   const { return (_level >= FULL); }
+  bool has_sync()   const { return (_sync  != OFF); }
+
+  double elapsed(enum ttype);
+  double cpu(enum ttype);
+
+  double get_cpu(enum ttype which) const {
+    return cpu_array[which]; };
+  double get_wall(enum ttype which) const {
+    return wall_array[which]; };
+
+  void set_wall(enum ttype, double);
+
+
+  void modify_params(int, char **);
 
  private:
-  double previous_time;
+  double cpu_array[NUM_TIMER];
+  double wall_array[NUM_TIMER];
+  double previous_cpu;
+  double previous_wall;
+  int _level;  // level of detail: off=0,loop=1,normal=2,full=3
+  int _sync;   // if nonzero, synchronize tasks before setting the timer
+
+  // update requested timer array
+  void _stamp(enum ttype);
 };
 
 }
diff --git a/src/verlet.cpp b/src/verlet.cpp
index b052a37bc0..345549d914 100644
--- a/src/verlet.cpp
+++ b/src/verlet.cpp
@@ -231,8 +231,10 @@ void Verlet::run(int n)
 
     // initial time integration
 
+    timer->stamp();
     modify->initial_integrate(vflag);
     if (n_post_integrate) modify->post_integrate();
+    timer->stamp(Timer::MODIFY);
 
     // regular communication vs neighbor list rebuild
 
@@ -241,9 +243,13 @@ void Verlet::run(int n)
     if (nflag == 0) {
       timer->stamp();
       comm->forward_comm();
-      timer->stamp(TIME_COMM);
+      timer->stamp(Timer::COMM);
     } else {
-      if (n_pre_exchange) modify->pre_exchange();
+      if (n_pre_exchange) {
+        timer->stamp();
+        modify->pre_exchange();
+        timer->stamp(Timer::MODIFY);
+      }
       if (triclinic) domain->x2lamda(atom->nlocal);
       domain->pbc();
       if (domain->box_change) {
@@ -256,10 +262,13 @@ void Verlet::run(int n)
       if (sortflag && ntimestep >= atom->nextsort) atom->sort();
       comm->borders();
       if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
-      timer->stamp(TIME_COMM);
-      if (n_pre_neighbor) modify->pre_neighbor();
+      timer->stamp(Timer::COMM);
+      if (n_pre_neighbor) {
+        modify->pre_neighbor();
+        timer->stamp(Timer::MODIFY);
+      }
       neighbor->build();
-      timer->stamp(TIME_NEIGHBOR);
+      timer->stamp(Timer::NEIGH);
     }
 
     // force computations
@@ -268,13 +277,17 @@ void Verlet::run(int n)
     // and Pair:ev_tally() needs to be called before any tallying
 
     force_clear();
-    if (n_pre_force) modify->pre_force(vflag);
 
     timer->stamp();
 
+    if (n_pre_force) {
+      modify->pre_force(vflag);
+      timer->stamp(Timer::MODIFY);
+    }
+
     if (pair_compute_flag) {
       force->pair->compute(eflag,vflag);
-      timer->stamp(TIME_PAIR);
+      timer->stamp(Timer::PAIR);
     }
 
     if (atom->molecular) {
@@ -282,19 +295,19 @@ void Verlet::run(int n)
       if (force->angle) force->angle->compute(eflag,vflag);
       if (force->dihedral) force->dihedral->compute(eflag,vflag);
       if (force->improper) force->improper->compute(eflag,vflag);
-      timer->stamp(TIME_BOND);
+      timer->stamp(Timer::BOND);
     }
 
     if (kspace_compute_flag) {
       force->kspace->compute(eflag,vflag);
-      timer->stamp(TIME_KSPACE);
+      timer->stamp(Timer::KSPACE);
     }
 
     // reverse communication of forces
 
     if (force->newton) {
       comm->reverse_comm();
-      timer->stamp(TIME_COMM);
+      timer->stamp(Timer::COMM);
     }
 
     // force modifications, final time integration, diagnostics
@@ -302,13 +315,14 @@ void Verlet::run(int n)
     if (n_post_force) modify->post_force(vflag);
     modify->final_integrate();
     if (n_end_of_step) modify->end_of_step();
+    timer->stamp(Timer::MODIFY);
 
     // all output
 
     if (ntimestep == output->next) {
       timer->stamp();
       output->write(ntimestep);
-      timer->stamp(TIME_OUTPUT);
+      timer->stamp(Timer::OUTPUT);
     }
   }
 }
@@ -329,7 +343,6 @@ void Verlet::cleanup()
 
 void Verlet::force_clear()
 {
-  int i;
   size_t nbytes;
 
   if (external_force_clear) return;
-- 
GitLab