From 389584998643627304e1a0c4a9140f7325e898ab Mon Sep 17 00:00:00 2001
From: sjplimp <sjplimp@f3b2605a-c512-4ea7-a41b-209d697bcdaa>
Date: Thu, 1 Dec 2011 17:05:22 +0000
Subject: [PATCH] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@7264
 f3b2605a-c512-4ea7-a41b-209d697bcdaa

---
 src/USER-OMP/Install.sh                       |   19 +-
 src/USER-OMP/Package.sh                       |   38 +-
 src/USER-OMP/dihedral_charmm_omp.cpp          |   53 +-
 src/USER-OMP/dihedral_charmm_omp.h            |    6 +-
 src/USER-OMP/dihedral_class2_omp.cpp          |   47 +-
 src/USER-OMP/dihedral_class2_omp.h            |    4 +-
 .../dihedral_cosine_shift_exp_omp.cpp         |   41 +-
 src/USER-OMP/dihedral_cosine_shift_exp_omp.h  |    4 +-
 src/USER-OMP/dihedral_harmonic_omp.cpp        |   40 +-
 src/USER-OMP/dihedral_harmonic_omp.h          |    6 +-
 src/USER-OMP/dihedral_helix_omp.cpp           |   53 +-
 src/USER-OMP/dihedral_helix_omp.h             |    4 +-
 src/USER-OMP/dihedral_multi_harmonic_omp.cpp  |   39 +-
 src/USER-OMP/dihedral_multi_harmonic_omp.h    |    4 +-
 src/USER-OMP/dihedral_opls_omp.cpp            |   41 +-
 src/USER-OMP/dihedral_opls_omp.h              |    4 +-
 src/USER-OMP/fix_nve_sphere_omp.cpp           |   46 +-
 src/USER-OMP/fix_shear_history_omp.cpp        |    2 +-
 src/USER-OMP/pair_adp_omp.cpp                 |   76 +-
 src/USER-OMP/pair_adp_omp.h                   |    3 +-
 src/USER-OMP/pair_born_coul_long_omp.cpp      |   40 +-
 src/USER-OMP/pair_born_coul_long_omp.h        |    2 +-
 src/USER-OMP/pair_born_omp.cpp                |   36 +-
 src/USER-OMP/pair_born_omp.h                  |    2 +-
 src/USER-OMP/pair_buck_coul_cut_omp.cpp       |   39 +-
 src/USER-OMP/pair_buck_coul_cut_omp.h         |    2 +-
 src/USER-OMP/pair_buck_coul_long_omp.cpp      |   37 +-
 src/USER-OMP/pair_buck_coul_long_omp.h        |    2 +-
 src/USER-OMP/pair_buck_coul_omp.cpp           |   42 +-
 src/USER-OMP/pair_buck_coul_omp.h             |    2 +-
 src/USER-OMP/pair_buck_omp.cpp                |   36 +-
 src/USER-OMP/pair_buck_omp.h                  |    2 +-
 src/USER-OMP/pair_cdeam_omp.cpp               |  108 +-
 src/USER-OMP/pair_cdeam_omp.h                 |    3 +-
 src/USER-OMP/pair_colloid_omp.cpp             |   42 +-
 src/USER-OMP/pair_colloid_omp.h               |    2 +-
 src/USER-OMP/pair_coul_cut_omp.cpp            |   46 +-
 src/USER-OMP/pair_coul_cut_omp.h              |    2 +-
 src/USER-OMP/pair_coul_debye_omp.cpp          |   46 +-
 src/USER-OMP/pair_coul_debye_omp.h            |    2 +-
 src/USER-OMP/pair_coul_long_omp.cpp           |   46 +-
 src/USER-OMP/pair_coul_long_omp.h             |    2 +-
 src/USER-OMP/pair_dipole_cut_omp.cpp          |   53 +-
 src/USER-OMP/pair_dipole_cut_omp.h            |    2 +-
 src/USER-OMP/pair_dipole_sf_omp.cpp           |   53 +-
 src/USER-OMP/pair_dipole_sf_omp.h             |    2 +-
 src/USER-OMP/pair_dpd_omp.cpp                 |   54 +-
 src/USER-OMP/pair_dpd_omp.h                   |    2 +-
 src/USER-OMP/pair_dpd_tstat_omp.cpp           |   52 +-
 src/USER-OMP/pair_dpd_tstat_omp.h             |    2 +-
 src/USER-OMP/pair_eam_omp.cpp                 |   65 +-
 src/USER-OMP/pair_eam_omp.h                   |    2 +-
 src/USER-OMP/pair_edip_omp.cpp                |   44 +-
 src/USER-OMP/pair_edip_omp.h                  |    2 +-
 src/USER-OMP/pair_eim_omp.cpp                 |   82 +-
 src/USER-OMP/pair_eim_omp.h                   |    2 +-
 src/USER-OMP/pair_gauss_omp.cpp               |   45 +-
 src/USER-OMP/pair_gauss_omp.h                 |    2 +-
 src/USER-OMP/pair_gayberne_omp.cpp            |   66 +-
 src/USER-OMP/pair_gayberne_omp.h              |    2 +-
 src/USER-OMP/pair_gran_hertz_history_omp.cpp  |   51 +-
 src/USER-OMP/pair_gran_hertz_history_omp.h    |    2 +-
 src/USER-OMP/pair_gran_hooke_history_omp.cpp  |   93 +-
 src/USER-OMP/pair_gran_hooke_history_omp.h    |    2 +-
 src/USER-OMP/pair_gran_hooke_omp.cpp          |   50 +-
 src/USER-OMP/pair_gran_hooke_omp.h            |    2 +-
 src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp   |   53 +-
 src/USER-OMP/pair_hbond_dreiding_lj_omp.h     |    2 +-
 .../pair_hbond_dreiding_morse_omp.cpp         |   53 +-
 src/USER-OMP/pair_hbond_dreiding_morse_omp.h  |    2 +-
 src/USER-OMP/pair_lj96_cut_omp.cpp            |   44 +-
 src/USER-OMP/pair_lj96_cut_omp.h              |    2 +-
 ...air_lj_charmm_coul_charmm_implicit_omp.cpp |   53 +-
 .../pair_lj_charmm_coul_charmm_implicit_omp.h |    2 +-
 .../pair_lj_charmm_coul_charmm_omp.cpp        |   53 +-
 src/USER-OMP/pair_lj_charmm_coul_charmm_omp.h |    2 +-
 src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp |   48 +-
 src/USER-OMP/pair_lj_charmm_coul_long_omp.h   |    2 +-
 src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp  |   50 +-
 src/USER-OMP/pair_lj_class2_coul_cut_omp.h    |    2 +-
 src/USER-OMP/pair_lj_class2_coul_long_omp.cpp |   48 +-
 src/USER-OMP/pair_lj_class2_coul_long_omp.h   |    2 +-
 src/USER-OMP/pair_lj_class2_omp.cpp           |   44 +-
 src/USER-OMP/pair_lj_class2_omp.h             |    2 +-
 src/USER-OMP/pair_lj_coul_omp.cpp             |   52 +-
 src/USER-OMP/pair_lj_coul_omp.h               |    2 +-
 src/USER-OMP/pair_lj_cubic_omp.cpp            |   44 +-
 src/USER-OMP/pair_lj_cubic_omp.h              |    2 +-
 src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp     |   52 +-
 src/USER-OMP/pair_lj_cut_coul_cut_omp.h       |    2 +-
 src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp   |   50 +-
 src/USER-OMP/pair_lj_cut_coul_debye_omp.h     |    2 +-
 src/USER-OMP/pair_lj_cut_coul_long_omp.cpp    |   50 +-
 src/USER-OMP/pair_lj_cut_coul_long_omp.h      |    2 +-
 .../pair_lj_cut_coul_long_tip4p_omp.cpp       |   52 +-
 .../pair_lj_cut_coul_long_tip4p_omp.h         |    3 +-
 src/USER-OMP/pair_lj_cut_omp.cpp              |   44 +-
 src/USER-OMP/pair_lj_cut_omp.h                |    2 +-
 src/USER-OMP/pair_lj_expand_omp.cpp           |   44 +-
 src/USER-OMP/pair_lj_expand_omp.h             |    2 +-
 .../pair_lj_gromacs_coul_gromacs_omp.cpp      |   48 +-
 .../pair_lj_gromacs_coul_gromacs_omp.h        |    2 +-
 src/USER-OMP/pair_lj_gromacs_omp.cpp          |   44 +-
 src/USER-OMP/pair_lj_gromacs_omp.h            |    2 +-
 src/USER-OMP/pair_lj_sf_omp.cpp               |   44 +-
 src/USER-OMP/pair_lj_sf_omp.h                 |    2 +-
 src/USER-OMP/pair_lj_smooth_omp.cpp           |   44 +-
 src/USER-OMP/pair_lj_smooth_omp.h             |    2 +-
 src/USER-OMP/pair_morse_omp.cpp               |   44 +-
 src/USER-OMP/pair_morse_omp.h                 |    2 +-
 src/USER-OMP/pair_peri_lps_omp.cpp            |   59 +-
 src/USER-OMP/pair_peri_lps_omp.h              |    2 +-
 src/USER-OMP/pair_peri_pmb_omp.cpp            |   51 +-
 src/USER-OMP/pair_peri_pmb_omp.h              |    2 +-
 src/USER-OMP/pair_resquared_omp.cpp           |   73 +-
 src/USER-OMP/pair_resquared_omp.h             |    2 +-
 src/USER-OMP/pair_soft_omp.cpp                |   44 +-
 src/USER-OMP/pair_soft_omp.h                  |    2 +-
 src/USER-OMP/pair_sw_omp.cpp                  |   40 +-
 src/USER-OMP/pair_sw_omp.h                    |    2 +-
 src/USER-OMP/pair_table_omp.cpp               |   44 +-
 src/USER-OMP/pair_table_omp.h                 |    2 +-
 src/USER-OMP/pair_tersoff_omp.cpp             |   44 +-
 src/USER-OMP/pair_tersoff_omp.h               |    2 +-
 src/USER-OMP/pair_yukawa_colloid_omp.cpp      |   44 +-
 src/USER-OMP/pair_yukawa_colloid_omp.h        |    2 +-
 src/USER-OMP/pair_yukawa_omp.cpp              |   44 +-
 src/USER-OMP/pair_yukawa_omp.h                |    2 +-
 src/USER-OMP/thr_omp.cpp                      | 1401 ++++++++++-------
 src/USER-OMP/thr_omp.h                        |  152 +-
 130 files changed, 2525 insertions(+), 2395 deletions(-)

diff --git a/src/USER-OMP/Install.sh b/src/USER-OMP/Install.sh
index db0beb5218..ec6fac4b92 100644
--- a/src/USER-OMP/Install.sh
+++ b/src/USER-OMP/Install.sh
@@ -1,10 +1,11 @@
 # Install/unInstall package files in LAMMPS
 # do not install child files if parent does not exist
 
-for file in *_omp.cpp *_omp.h; do
+for file in *_omp.cpp *_omp.h  pppm*proxy.h pppm*proxy.cpp; do
     # let us see if the "rain man" can count the toothpicks...
-   ofile=`echo $file | sed -e s,\\\\\\(.\\*\\\\\\)_omp\\\\.\\\\\\(h\\\\\\|cpp\\\\\\),\\\\1.\\\\2,`
-
+   ofile=`echo $file | sed  -e s,_pppm_tip4p_omp,_long_tip4p_omp, \
+   -e s,pppm.\\*_proxy,pppm_omp, -e s,_pppm_omp,_long_omp, \
+   -e s,\\\\\\(.\\*\\\\\\)_omp\\\\.\\\\\\(h\\\\\\|cpp\\\\\\),\\\\1.\\\\2,`
   if (test $1 = 1) then
     if (test $file = "thr_omp.h") || (test $file = "thr_omp.cpp") then
       :  # always install those files.
@@ -18,3 +19,15 @@ for file in *_omp.cpp *_omp.h; do
     rm -f ../$file
   fi
 done
+
+if (test $1 = 1) then
+
+  cp thr_data.h ..
+  cp thr_data.cpp ..
+
+elif (test $1 = 0) then
+
+  rm -f ../thr_data.h
+  rm -f ../thr_data.cpp
+
+fi
diff --git a/src/USER-OMP/Package.sh b/src/USER-OMP/Package.sh
index 5a004c9187..6f577b2791 100644
--- a/src/USER-OMP/Package.sh
+++ b/src/USER-OMP/Package.sh
@@ -1,22 +1,40 @@
 # Update package files in LAMMPS
-# cp package file to src if doesn't exist or is different
-# do not copy certain files if non-OMP versions do not exist
-# do remove OpenMP style files that have no matching
-#   non-OpenMP version installed, e.g. after a package has been removed
-
-for file in *_omp.cpp *_omp.h; do
+# copy package file to src if it doesn't exists or is different
+# do not copy OpenMP style files, if a non-OpenMP version does 
+# not exist. Do remove OpenMP style files that have no matching
+# non-OpenMP version installed, e.g. after a package has been
+# removed
+for file in *_omp.cpp *_omp.h pppm*proxy.h pppm*proxy.cpp thr_data.h thr_data.cpp; do
   # let us see if the "rain man" can count the toothpicks...
-  ofile=`echo $file | sed -e s,\\\\\\(.\\*\\\\\\)_omp\\\\.\\\\\\(h\\\\\\|cpp\\\\\\),\\\\1.\\\\2,`
-  if (test $file = "thr_omp.h") || (test $file = "thr_omp.cpp") then
-    :  # always check for those files.
+   ofile=`echo $file | sed  -e s,_pppm_tip4p_omp,_long_tip4p_omp, \
+   -e s,pppm.\\*_proxy,pppm_omp, -e s,_pppm_omp,_long_omp, \
+   -e s,\\\\\\(.\\*\\\\\\)_omp\\\\.\\\\\\(h\\\\\\|cpp\\\\\\),\\\\1.\\\\2,`
+  if (test $file = "thr_omp.h") || (test $file = "thr_omp.cpp") \
+      || (test $file = "thr_data.h") || (test $file = "thr_data.cpp") then
+    if (test ! -e ../$file) then
+      echo "  creating src/$file"
+      cp $file ..
+    elif ! cmp -s $file ../$file ; then
+      echo "  updating src/$file"
+      cp $file ..
+    fi
   elif (test ! -e ../$ofile) then
     if (test -e ../$file) then
       echo "  removing src/$file"
       rm -f ../$file
     fi
-    continue
+  else
+    if (test ! -e ../$file) then
+      echo "  creating src/$file"
+      cp $file ..
+    elif ! cmp -s $file ../$file ; then
+      echo "  updating src/$file"
+      cp $file ..
+    fi
   fi
+done
 
+for file in thr_data.h thr_data.cpp; do
   if (test ! -e ../$file) then
     echo "  creating src/$file"
     cp $file ..
diff --git a/src/USER-OMP/dihedral_charmm_omp.cpp b/src/USER-OMP/dihedral_charmm_omp.cpp
index 63bfc43270..b4d7e2e4ad 100644
--- a/src/USER-OMP/dihedral_charmm_omp.cpp
+++ b/src/USER-OMP/dihedral_charmm_omp.cpp
@@ -40,7 +40,6 @@ void DihedralCharmmOMP::compute(int eflag, int vflag)
 
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = 0;
 
   // insure pair->ev_tally() will use 1-4 virial contribution
@@ -53,43 +52,34 @@ void DihedralCharmmOMP::compute(int eflag, int vflag)
   const int inum = neighbor->ndihedrallist;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-
-  // reduce contributions to non-bonded energy terms
-  for (int n = 0; n < nthreads; ++n) {
-    force->pair->eng_vdwl += eng_vdwl_thr[n];
-    force->pair->eng_coul += eng_coul_thr[n];
-  }
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_BOND>
-void DihedralCharmmOMP::eval(double **f, int nfrom, int nto, int tid)
+void DihedralCharmmOMP::eval(int nfrom, int nto, ThrData * const thr)
 {
   
   int i1,i2,i3,i4,i,m,n,type;
@@ -105,12 +95,13 @@ void DihedralCharmmOMP::eval(double **f, int nfrom, int nto, int tid)
 
   edihedral = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  int *atomtype = atom->type;
-  int **dihedrallist = neighbor->dihedrallist;
-  int nlocal = atom->nlocal;
-  double qqrd2e = force->qqrd2e;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const atomtype = atom->type;
+  const int * const * const dihedrallist = neighbor->dihedrallist;
+  const double qqrd2e = force->qqrd2e;
+  const int nlocal = atom->nlocal;
 
   for (n = nfrom; n < nto; n++) {
     i1 = dihedrallist[n][0];
@@ -176,7 +167,7 @@ void DihedralCharmmOMP::eval(double **f, int nfrom, int nto, int tid)
       if (screen) {
 	char str[128];
 	sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d",
-		me,tid,update->ntimestep,
+		me,thr->get_tid(),update->ntimestep,
 		atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
 	error->warning(FLERR,str,0);
 	fprintf(screen,"  1st atom: %d %g %g %g\n",
@@ -282,7 +273,7 @@ void DihedralCharmmOMP::eval(double **f, int nfrom, int nto, int tid)
 
     if (EVFLAG)
       ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4,
-		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid);
+		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr);
     // 1-4 LJ and Coulomb interactions
     // tally energy/virial in pair, using newton_bond as newton flag
 
@@ -321,7 +312,7 @@ void DihedralCharmmOMP::eval(double **f, int nfrom, int nto, int tid)
       }
 
       if (EVFLAG) ev_tally_thr(force->pair,i1,i4,nlocal,NEWTON_BOND,
-			       evdwl,ecoul,fpair,delx,dely,delz,tid);
+			       evdwl,ecoul,fpair,delx,dely,delz,thr);
     }
   }
 }
diff --git a/src/USER-OMP/dihedral_charmm_omp.h b/src/USER-OMP/dihedral_charmm_omp.h
index a39ad83f7e..75ba6410d5 100644
--- a/src/USER-OMP/dihedral_charmm_omp.h
+++ b/src/USER-OMP/dihedral_charmm_omp.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
@@ -33,13 +33,13 @@ class DihedralCharmmOMP : public DihedralCharmm, public ThrOMP {
 
  public:
     DihedralCharmmOMP(class LAMMPS *lmp) : 
-      DihedralCharmm(lmp), ThrOMP(lmp,DIHEDRAL) {};
+      DihedralCharmm(lmp), ThrOMP(lmp,THR_DIHEDRAL|THR_CHARMM) {};
 
   virtual void compute(int, int);
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_BOND>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/dihedral_class2_omp.cpp b/src/USER-OMP/dihedral_class2_omp.cpp
index 7348296644..07e0fba6e1 100644
--- a/src/USER-OMP/dihedral_class2_omp.cpp
+++ b/src/USER-OMP/dihedral_class2_omp.cpp
@@ -39,7 +39,6 @@ void DihedralClass2OMP::compute(int eflag, int vflag)
 
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -47,37 +46,34 @@ void DihedralClass2OMP::compute(int eflag, int vflag)
   const int inum = neighbor->ndihedrallist;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_BOND>
-void DihedralClass2OMP::eval(double **f, int nfrom, int nto, int tid)
+void DihedralClass2OMP::eval(int nfrom, int nto, ThrData * const thr)
 {
   
   int i1,i2,i3,i4,i,j,k,n,type;
@@ -96,9 +92,10 @@ void DihedralClass2OMP::eval(double **f, int nfrom, int nto, int tid)
 
   edihedral = 0.0;
 
-  double **x = atom->x;
-  int **dihedrallist = neighbor->dihedrallist;
-  int nlocal = atom->nlocal;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const dihedrallist = neighbor->dihedrallist;
+  const int nlocal = atom->nlocal;
 
   for (n = nfrom; n < nto; n++) {
     i1 = dihedrallist[n][0];
@@ -170,7 +167,7 @@ void DihedralClass2OMP::eval(double **f, int nfrom, int nto, int tid)
     sc2 = sqrt(sin2);
     if (sc2 < SMALL) sc2 = SMALL;
     sc2 = 1.0/sc2;
-          
+
     s1 = sc1 * sc1;
     s2 = sc2 * sc2;
     s12 = sc1 * sc2;
@@ -179,12 +176,12 @@ void DihedralClass2OMP::eval(double **f, int nfrom, int nto, int tid)
     // error check
 
     if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) {
-      int me;
-      MPI_Comm_rank(world,&me);
+      int me = comm->me;
+
       if (screen) {
 	char str[128];
-	sprintf(str,"Dihedral problem: %d " BIGINT_FORMAT " %d %d %d %d",
-		me,update->ntimestep,
+	sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d",
+		me,thr->get_tid(),update->ntimestep,
 		atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
 	error->warning(FLERR,str,0);
 	fprintf(screen,"  1st atom: %d %g %g %g\n",
@@ -526,7 +523,7 @@ void DihedralClass2OMP::eval(double **f, int nfrom, int nto, int tid)
     if (EVFLAG)
       ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,
 		   fabcd[0],fabcd[2],fabcd[3],
-		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid);
+		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr);
   }
 }
 
diff --git a/src/USER-OMP/dihedral_class2_omp.h b/src/USER-OMP/dihedral_class2_omp.h
index d26f2f8713..14a6c40edd 100644
--- a/src/USER-OMP/dihedral_class2_omp.h
+++ b/src/USER-OMP/dihedral_class2_omp.h
@@ -33,13 +33,13 @@ class DihedralClass2OMP : public DihedralClass2, public ThrOMP {
 
  public:
     DihedralClass2OMP(class LAMMPS *lmp) : 
-      DihedralClass2(lmp), ThrOMP(lmp,DIHEDRAL) {};
+      DihedralClass2(lmp), ThrOMP(lmp,THR_DIHEDRAL) {};
 
   virtual void compute(int, int);
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_BOND>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp b/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp
index a6c027e92d..1a80e8a7cd 100644
--- a/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp
+++ b/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp
@@ -39,7 +39,6 @@ void DihedralCosineShiftExpOMP::compute(int eflag, int vflag)
 
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -47,37 +46,34 @@ void DihedralCosineShiftExpOMP::compute(int eflag, int vflag)
   const int inum = neighbor->ndihedrallist;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_BOND>
-void DihedralCosineShiftExpOMP::eval(double **f, int nfrom, int nto, int tid)
+void DihedralCosineShiftExpOMP::eval(int nfrom, int nto, ThrData * const thr)
 {
   
   int i1,i2,i3,i4,n,type;
@@ -91,9 +87,10 @@ void DihedralCosineShiftExpOMP::eval(double **f, int nfrom, int nto, int tid)
 
   edihedral = 0.0;
 
-  double **x = atom->x;
-  int **dihedrallist = neighbor->dihedrallist;
-  int nlocal = atom->nlocal;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const dihedrallist = neighbor->dihedrallist;
+  const int nlocal = atom->nlocal;
 
   for (n = nfrom; n < nto; n++) {
     i1 = dihedrallist[n][0];
@@ -159,7 +156,7 @@ void DihedralCosineShiftExpOMP::eval(double **f, int nfrom, int nto, int tid)
       if (screen) {
 	char str[128];
 	sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d",
-		me,tid,update->ntimestep,
+		me,thr->get_tid(),update->ntimestep,
 		atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
 	error->warning(FLERR,str,0);
 	fprintf(screen,"  1st atom: %d %g %g %g\n",
@@ -172,7 +169,7 @@ void DihedralCosineShiftExpOMP::eval(double **f, int nfrom, int nto, int tid)
 		me,x[i4][0],x[i4][1],x[i4][2]);
       }
     }
-    
+
     if (c > 1.0) c = 1.0;
     if (c < -1.0) c = -1.0;
 
@@ -257,7 +254,7 @@ void DihedralCosineShiftExpOMP::eval(double **f, int nfrom, int nto, int tid)
 
     if (EVFLAG)
       ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4,
-		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid);
+		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr);
   }
 }
 
diff --git a/src/USER-OMP/dihedral_cosine_shift_exp_omp.h b/src/USER-OMP/dihedral_cosine_shift_exp_omp.h
index eb906ab953..54627c169b 100644
--- a/src/USER-OMP/dihedral_cosine_shift_exp_omp.h
+++ b/src/USER-OMP/dihedral_cosine_shift_exp_omp.h
@@ -33,13 +33,13 @@ class DihedralCosineShiftExpOMP : public DihedralCosineShiftExp, public ThrOMP {
 
  public:
     DihedralCosineShiftExpOMP(class LAMMPS *lmp) : 
-      DihedralCosineShiftExp(lmp), ThrOMP(lmp,DIHEDRAL) {};
+      DihedralCosineShiftExp(lmp), ThrOMP(lmp,THR_DIHEDRAL) {};
 
   virtual void compute(int, int);
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_BOND>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/dihedral_harmonic_omp.cpp b/src/USER-OMP/dihedral_harmonic_omp.cpp
index 0fa24090a7..cdad9b6ab8 100644
--- a/src/USER-OMP/dihedral_harmonic_omp.cpp
+++ b/src/USER-OMP/dihedral_harmonic_omp.cpp
@@ -39,7 +39,6 @@ void DihedralHarmonicOMP::compute(int eflag, int vflag)
 
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -47,37 +46,34 @@ void DihedralHarmonicOMP::compute(int eflag, int vflag)
   const int inum = neighbor->ndihedrallist;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_BOND>
-void DihedralHarmonicOMP::eval(double **f, int nfrom, int nto, int tid)
+void DihedralHarmonicOMP::eval(int nfrom, int nto, ThrData * const thr)
 {
   
   int i1,i2,i3,i4,i,m,n,type;
@@ -90,9 +86,10 @@ void DihedralHarmonicOMP::eval(double **f, int nfrom, int nto, int tid)
 
   edihedral = 0.0;
 
-  double **x = atom->x;
-  int **dihedrallist = neighbor->dihedrallist;
-  int nlocal = atom->nlocal;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const dihedrallist = neighbor->dihedrallist;
+  const int nlocal = atom->nlocal;
 
   for (n = nfrom; n < nto; n++) {
     i1 = dihedrallist[n][0];
@@ -158,7 +155,7 @@ void DihedralHarmonicOMP::eval(double **f, int nfrom, int nto, int tid)
       if (screen) {
 	char str[128];
 	sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d",
-		me,tid,update->ntimestep,
+		me,thr->get_tid(),update->ntimestep,
 		atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
 	error->warning(FLERR,str,0);
 	fprintf(screen,"  1st atom: %d %g %g %g\n",
@@ -264,7 +261,6 @@ void DihedralHarmonicOMP::eval(double **f, int nfrom, int nto, int tid)
 
     if (EVFLAG)
       ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4,
-		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid);
+		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr);
   }
 }
-
diff --git a/src/USER-OMP/dihedral_harmonic_omp.h b/src/USER-OMP/dihedral_harmonic_omp.h
index 2d7bae64ee..8b8562ad90 100644
--- a/src/USER-OMP/dihedral_harmonic_omp.h
+++ b/src/USER-OMP/dihedral_harmonic_omp.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
@@ -33,13 +33,13 @@ class DihedralHarmonicOMP : public DihedralHarmonic, public ThrOMP {
 
  public:
     DihedralHarmonicOMP(class LAMMPS *lmp) : 
-      DihedralHarmonic(lmp), ThrOMP(lmp,DIHEDRAL) {};
+      DihedralHarmonic(lmp), ThrOMP(lmp,THR_DIHEDRAL) {};
 
   virtual void compute(int, int);
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_BOND>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/dihedral_helix_omp.cpp b/src/USER-OMP/dihedral_helix_omp.cpp
index 4ec701a0cb..b9b61982f9 100644
--- a/src/USER-OMP/dihedral_helix_omp.cpp
+++ b/src/USER-OMP/dihedral_helix_omp.cpp
@@ -42,7 +42,6 @@ void DihedralHelixOMP::compute(int eflag, int vflag)
 
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -50,37 +49,34 @@ void DihedralHelixOMP::compute(int eflag, int vflag)
   const int inum = neighbor->ndihedrallist;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_BOND>
-void DihedralHelixOMP::eval(double **f, int nfrom, int nto, int tid)
+void DihedralHelixOMP::eval(int nfrom, int nto, ThrData * const thr)
 {
   
   int i1,i2,i3,i4,n,type;
@@ -94,9 +90,10 @@ void DihedralHelixOMP::eval(double **f, int nfrom, int nto, int tid)
 
   edihedral = 0.0;
 
-  double **x = atom->x;
-  int **dihedrallist = neighbor->dihedrallist;
-  int nlocal = atom->nlocal;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const dihedrallist = neighbor->dihedrallist;
+  const int nlocal = atom->nlocal;
 
   for (n = nfrom; n < nto; n++) {
     i1 = dihedrallist[n][0];
@@ -132,18 +129,18 @@ void DihedralHelixOMP::eval(double **f, int nfrom, int nto, int tid)
     domain->minimum_image(vb3x,vb3y,vb3z);
     
     // c0 calculation
-        
+
     sb1 = 1.0 / (vb1x*vb1x + vb1y*vb1y + vb1z*vb1z);
     sb2 = 1.0 / (vb2x*vb2x + vb2y*vb2y + vb2z*vb2z);
     sb3 = 1.0 / (vb3x*vb3x + vb3y*vb3y + vb3z*vb3z);
-        
+
     rb1 = sqrt(sb1);
     rb3 = sqrt(sb3);
-        
+
     c0 = (vb1x*vb3x + vb1y*vb3y + vb1z*vb3z) * rb1*rb3;
 
     // 1st and 2nd angle
-        
+
     b1mag2 = vb1x*vb1x + vb1y*vb1y + vb1z*vb1z;
     b1mag = sqrt(b1mag2);
     b2mag2 = vb2x*vb2x + vb2y*vb2y + vb2z*vb2z;
@@ -181,15 +178,16 @@ void DihedralHelixOMP::eval(double **f, int nfrom, int nto, int tid)
     cz = vb1x*vb2y - vb1y*vb2x;
     cmag = sqrt(cx*cx + cy*cy + cz*cz);
     dx = (cx*vb3x + cy*vb3y + cz*vb3z)/cmag/b3mag;
-    
+
     // error check
 
     if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) {
       int me = comm->me;
+
       if (screen) {
 	char str[128];
 	sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d",
-		me,tid,update->ntimestep,
+		me,thr->get_tid(),update->ntimestep,
 		atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
 	error->warning(FLERR,str,0);
 	fprintf(screen,"  1st atom: %d %g %g %g\n",
@@ -202,7 +200,7 @@ void DihedralHelixOMP::eval(double **f, int nfrom, int nto, int tid)
 		me,x[i4][0],x[i4][1],x[i4][2]);
       }
     }
-    
+
     if (c > 1.0) c = 1.0;
     if (c < -1.0) c = -1.0;
 
@@ -217,7 +215,6 @@ void DihedralHelixOMP::eval(double **f, int nfrom, int nto, int tid)
 
     if (EFLAG) edihedral = aphi[type]*(1.0 - c) + bphi[type]*(1.0 + cos(3.0*phi)) +
 		 cphi[type]*(1.0 + cos(phi + MY_PI4));
-;
 
     a = pd;
     c = c * a;
@@ -277,6 +274,6 @@ void DihedralHelixOMP::eval(double **f, int nfrom, int nto, int tid)
 
     if (EVFLAG)
       ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4,
-		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid);
+		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr);
   }
 }
diff --git a/src/USER-OMP/dihedral_helix_omp.h b/src/USER-OMP/dihedral_helix_omp.h
index 7923197413..e932045cff 100644
--- a/src/USER-OMP/dihedral_helix_omp.h
+++ b/src/USER-OMP/dihedral_helix_omp.h
@@ -33,13 +33,13 @@ class DihedralHelixOMP : public DihedralHelix, public ThrOMP {
 
  public:
     DihedralHelixOMP(class LAMMPS *lmp) : 
-      DihedralHelix(lmp), ThrOMP(lmp,DIHEDRAL) {};
+      DihedralHelix(lmp), ThrOMP(lmp,THR_DIHEDRAL) {};
 
   virtual void compute(int, int);
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_BOND>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/dihedral_multi_harmonic_omp.cpp b/src/USER-OMP/dihedral_multi_harmonic_omp.cpp
index bde958984e..822ddb7965 100644
--- a/src/USER-OMP/dihedral_multi_harmonic_omp.cpp
+++ b/src/USER-OMP/dihedral_multi_harmonic_omp.cpp
@@ -39,7 +39,6 @@ void DihedralMultiHarmonicOMP::compute(int eflag, int vflag)
 
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -47,37 +46,34 @@ void DihedralMultiHarmonicOMP::compute(int eflag, int vflag)
   const int inum = neighbor->ndihedrallist;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_BOND>
-void DihedralMultiHarmonicOMP::eval(double **f, int nfrom, int nto, int tid)
+void DihedralMultiHarmonicOMP::eval(int nfrom, int nto, ThrData * const thr)
 {
   
   int i1,i2,i3,i4,n,type;
@@ -91,9 +87,10 @@ void DihedralMultiHarmonicOMP::eval(double **f, int nfrom, int nto, int tid)
 
   edihedral = 0.0;
 
-  double **x = atom->x;
-  int **dihedrallist = neighbor->dihedrallist;
-  int nlocal = atom->nlocal;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const dihedrallist = neighbor->dihedrallist;
+  const int nlocal = atom->nlocal;
 
   for (n = nfrom; n < nto; n++) {
     i1 = dihedrallist[n][0];
@@ -180,7 +177,7 @@ void DihedralMultiHarmonicOMP::eval(double **f, int nfrom, int nto, int tid)
       if (screen) {
 	char str[128];
 	sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d",
-		me,tid,update->ntimestep,
+		me,thr->get_tid(),update->ntimestep,
 		atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
 	error->warning(FLERR,str,0);
 	fprintf(screen,"  1st atom: %d %g %g %g\n",
@@ -264,6 +261,6 @@ void DihedralMultiHarmonicOMP::eval(double **f, int nfrom, int nto, int tid)
 
     if (EVFLAG)
       ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4,
-		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid);
+		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr);
   }
 }
diff --git a/src/USER-OMP/dihedral_multi_harmonic_omp.h b/src/USER-OMP/dihedral_multi_harmonic_omp.h
index da2322f038..628ad2a6a0 100644
--- a/src/USER-OMP/dihedral_multi_harmonic_omp.h
+++ b/src/USER-OMP/dihedral_multi_harmonic_omp.h
@@ -33,13 +33,13 @@ class DihedralMultiHarmonicOMP : public DihedralMultiHarmonic, public ThrOMP {
 
  public:
     DihedralMultiHarmonicOMP(class LAMMPS *lmp) : 
-      DihedralMultiHarmonic(lmp), ThrOMP(lmp,DIHEDRAL) {};
+      DihedralMultiHarmonic(lmp), ThrOMP(lmp,THR_DIHEDRAL) {};
 
   virtual void compute(int, int);
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_BOND>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/dihedral_opls_omp.cpp b/src/USER-OMP/dihedral_opls_omp.cpp
index 9f59e26d26..6e46575f3b 100644
--- a/src/USER-OMP/dihedral_opls_omp.cpp
+++ b/src/USER-OMP/dihedral_opls_omp.cpp
@@ -40,7 +40,6 @@ void DihedralOPLSOMP::compute(int eflag, int vflag)
 
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -48,37 +47,34 @@ void DihedralOPLSOMP::compute(int eflag, int vflag)
   const int inum = neighbor->ndihedrallist;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_BOND>
-void DihedralOPLSOMP::eval(double **f, int nfrom, int nto, int tid)
+void DihedralOPLSOMP::eval(int nfrom, int nto, ThrData * const thr)
 {
   
   int i1,i2,i3,i4,n,type;
@@ -92,9 +88,10 @@ void DihedralOPLSOMP::eval(double **f, int nfrom, int nto, int tid)
 
   edihedral = 0.0;
 
-  double **x = atom->x;
-  int **dihedrallist = neighbor->dihedrallist;
-  int nlocal = atom->nlocal;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const dihedrallist = neighbor->dihedrallist;
+  const int nlocal = atom->nlocal;
 
   for (n = nfrom; n < nto; n++) {
     i1 = dihedrallist[n][0];
@@ -188,7 +185,7 @@ void DihedralOPLSOMP::eval(double **f, int nfrom, int nto, int tid)
       if (screen) {
 	char str[128];
 	sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d",
-		me,tid,update->ntimestep,
+		me,thr->get_tid(),update->ntimestep,
 		atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
 	error->warning(FLERR,str,0);
 	fprintf(screen,"  1st atom: %d %g %g %g\n",
@@ -201,7 +198,7 @@ void DihedralOPLSOMP::eval(double **f, int nfrom, int nto, int tid)
 		me,x[i4][0],x[i4][1],x[i4][2]);
       }
     }
-    
+
     if (c > 1.0) c = 1.0;
     if (c < -1.0) c = -1.0;
 
@@ -280,7 +277,7 @@ void DihedralOPLSOMP::eval(double **f, int nfrom, int nto, int tid)
 
     if (EVFLAG)
       ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4,
-		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid);
+		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr);
   }
 }
 
diff --git a/src/USER-OMP/dihedral_opls_omp.h b/src/USER-OMP/dihedral_opls_omp.h
index 58b9920538..44c76bb2ad 100644
--- a/src/USER-OMP/dihedral_opls_omp.h
+++ b/src/USER-OMP/dihedral_opls_omp.h
@@ -33,13 +33,13 @@ class DihedralOPLSOMP : public DihedralOPLS, public ThrOMP {
 
  public:
     DihedralOPLSOMP(class LAMMPS *lmp) : 
-      DihedralOPLS(lmp), ThrOMP(lmp,DIHEDRAL) {};
+      DihedralOPLS(lmp), ThrOMP(lmp,THR_DIHEDRAL) {};
 
   virtual void compute(int, int);
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_BOND>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/fix_nve_sphere_omp.cpp b/src/USER-OMP/fix_nve_sphere_omp.cpp
index a642b21f22..93af055f81 100644
--- a/src/USER-OMP/fix_nve_sphere_omp.cpp
+++ b/src/USER-OMP/fix_nve_sphere_omp.cpp
@@ -34,26 +34,24 @@ enum{NONE,DIPOLE};
 
 void FixNVESphereOMP::initial_integrate(int vflag)
 {
-  double **x = atom->x;
-  double **v = atom->v;
-  double **f = atom->f;
-  double **omega = atom->omega;
-  double **torque = atom->torque;
-  double *radius = atom->radius;
-  double *rmass = atom->rmass;
-  int *mask = atom->mask;
-  int nlocal = atom->nlocal;
+  double * const * const x = atom->x;
+  double * const * const v = atom->v;
+  const double * const * const f = atom->f;
+  double * const * const omega = atom->omega;
+  const double * const * const torque = atom->torque;
+  const double * const radius = atom->radius;
+  const double * const rmass = atom->rmass;
+  const int * const mask = atom->mask;
+  const int nlocal = (igroup == atom->firstgroup) ? atom->nfirst : atom->nlocal;
   int i;
   
-  if (igroup == atom->firstgroup) nlocal = atom->nfirst;
-
   // set timestep here since dt may have changed or come via rRESPA
   const double dtfrotate = dtf / INERTIA;
 
   // update v,x,omega for all particles
   // d_omega/dt = torque / inertia
 #if defined(_OPENMP)
-#pragma omp parallel for private(i) default(shared)
+#pragma omp parallel for private(i) default(none)
 #endif
   for (i = 0; i < nlocal; i++) {
     if (mask[i] & groupbit) {
@@ -77,9 +75,9 @@ void FixNVESphereOMP::initial_integrate(int vflag)
   // renormalize mu to dipole length
 
   if (extra == DIPOLE) {
-    double **mu = atom->mu;
+    double * const * const mu = atom->mu;
 #if defined(_OPENMP)
-#pragma omp parallel for private(i) default(shared)
+#pragma omp parallel for private(i) default(none)
 #endif
     for (i = 0; i < nlocal; i++) { 
       double g0,g1,g2,msq,scale;
@@ -103,18 +101,16 @@ void FixNVESphereOMP::initial_integrate(int vflag)
 
 void FixNVESphereOMP::final_integrate()
 {
-  double **v = atom->v;
-  double **f = atom->f;
-  double **omega = atom->omega;
-  double **torque = atom->torque;
-  double *rmass = atom->rmass;
-  double *radius = atom->radius;
-  int *mask = atom->mask;
-  int nlocal = atom->nlocal;
+  double * const * const v = atom->v;
+  const double * const * const f = atom->f;
+  double * const * const omega = atom->omega;
+  const double * const * const torque = atom->torque;
+  const double * const rmass = atom->rmass;
+  const double * const radius = atom->radius;
+  const int * const mask = atom->mask;
+  const int nlocal = (igroup == atom->firstgroup) ? atom->nfirst : atom->nlocal;
   int i;
   
-  if (igroup == atom->firstgroup) nlocal = atom->nfirst;
-
   // set timestep here since dt may have changed or come via rRESPA
 
   const double dtfrotate = dtf / INERTIA;
@@ -123,7 +119,7 @@ void FixNVESphereOMP::final_integrate()
   // d_omega/dt = torque / inertia
 
 #if defined(_OPENMP)
-#pragma omp parallel for private(i) default(shared)
+#pragma omp parallel for private(i) default(none)
 #endif
   for (i = 0; i < nlocal; i++)
     if (mask[i] & groupbit) {
diff --git a/src/USER-OMP/fix_shear_history_omp.cpp b/src/USER-OMP/fix_shear_history_omp.cpp
index 40781cb407..4655dd1af7 100644
--- a/src/USER-OMP/fix_shear_history_omp.cpp
+++ b/src/USER-OMP/fix_shear_history_omp.cpp
@@ -47,7 +47,7 @@ void FixShearHistoryOMP::pre_exchange()
 
   int flag = 0;
 #if defined(_OPENMP)
-#pragma omp parallel shared(flag)
+#pragma omp parallel default(none) shared(flag)
 #endif
   {
 
diff --git a/src/USER-OMP/pair_adp_omp.cpp b/src/USER-OMP/pair_adp_omp.cpp
index e91642e6ba..3af4a2f7cd 100644
--- a/src/USER-OMP/pair_adp_omp.cpp
+++ b/src/USER-OMP/pair_adp_omp.cpp
@@ -28,7 +28,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairADPOMP::PairADPOMP(LAMMPS *lmp) :
-  PairADP(lmp), ThrOMP(lmp, PAIR)
+  PairADP(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -39,10 +39,10 @@ void PairADPOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0;
 
-  const int nall = atom->nlocal + atom->nghost;
+  const int nlocal = atom->nlocal;
+  const int nall = nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
@@ -62,48 +62,39 @@ void PairADPOMP::compute(int eflag, int vflag)
   }
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f, *rho_t, **mu_t, **lambda_t;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
-    if (force->newton_pair) {
-      rho_t = rho + tid*nall;
-      mu_t = mu + tid*nall;
-      lambda_t = lambda + tid*nall;
-    } else {
-      rho_t = rho + tid*atom->nlocal;
-      mu_t = mu + tid*atom->nlocal;
-      lambda_t = lambda + tid*atom->nlocal;
-    }
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+    
+    if (force->newton_pair)
+      thr->init_adp(nall, rho, mu, lambda);
+    else
+      thr->init_adp(nlocal, rho, mu, lambda);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid);
-	else eval<1,1,0>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid);
-	else eval<1,0,0>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid);
-      else eval<0,0,0>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairADPOMP::eval(double **f, double *rho_t, double **mu_t, 
-		      double **lambda_t, int iifrom, int iito, int tid)
+void PairADPOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,m,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -117,7 +108,13 @@ void PairADPOMP::eval(double **f, double *rho_t, double **mu_t,
 
   evdwl = 0.0;
 
-  double **x = atom->x;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  double * const rho_t = thr->get_rho();
+  double * const * const mu_t = thr->get_mu();
+  double * const * const lambda_t = thr->get_lambda();
+  const int tid = thr->get_tid();
+
   int *type = atom->type;
   int nlocal = atom->nlocal;
   int nall = nlocal + atom->nghost;
@@ -128,18 +125,6 @@ void PairADPOMP::eval(double **f, double *rho_t, double **mu_t,
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
-  // zero out density 
-
-  if (NEWTON_PAIR) {
-    memset(rho_t, 0, nall*sizeof(double));
-    memset(&(mu_t[0][0]), 0, 3*nall*sizeof(double));
-    memset(&(lambda_t[0][0]), 0, 6*nall*sizeof(double));
-  } else {
-    memset(rho_t, 0, nlocal*sizeof(double));
-    memset(&(mu_t[0][0]), 0, 3*nlocal*sizeof(double));
-    memset(&(lambda_t[0][0]), 0, 6*nlocal*sizeof(double));
-  }
-
   // rho = density at each atom
   // loop over neighbors of my atoms
 
@@ -259,8 +244,7 @@ void PairADPOMP::eval(double **f, double *rho_t, double **mu_t,
 		  lambda[i][4]+lambda[i][5]*lambda[i][5]);
       phi -= 1.0/6.0*(lambda[i][0]+lambda[i][1]+lambda[i][2])*
 	(lambda[i][0]+lambda[i][1]+lambda[i][2]);
-      if (eflag_global) eng_vdwl_thr[tid] += phi;
-      if (eflag_atom) eatom_thr[tid][i] += phi;
+      e_tally_thr(this,i,i,nlocal,/* newton_pair */ 1, phi, 0.0, thr);
     }
   }
 
@@ -384,7 +368,7 @@ void PairADPOMP::eval(double **f, double *rho_t, double **mu_t,
 
 	if (EFLAG) evdwl = phi;
 	if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR,evdwl,0.0,
-				     fx,fy,fz,delx,dely,delz,tid);
+				     fx,fy,fz,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
@@ -399,6 +383,6 @@ double PairADPOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
   bytes += PairADP::memory_usage();
-
+  bytes += (comm->nthreads-1) * nmax * (10*sizeof(double) + 3*sizeof(double *));
   return bytes;
 }
diff --git a/src/USER-OMP/pair_adp_omp.h b/src/USER-OMP/pair_adp_omp.h
index f7d2509cd3..9a7f4023fb 100644
--- a/src/USER-OMP/pair_adp_omp.h
+++ b/src/USER-OMP/pair_adp_omp.h
@@ -39,8 +39,7 @@ class PairADPOMP : public PairADP, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, double *rho_t, double **mu_t, double **lambda_t, 
-	    int iifrom, int iito, int tid);
+  void eval(int iifrom, int iito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_born_coul_long_omp.cpp b/src/USER-OMP/pair_born_coul_long_omp.cpp
index c277a080c0..cf409f3cfc 100644
--- a/src/USER-OMP/pair_born_coul_long_omp.cpp
+++ b/src/USER-OMP/pair_born_coul_long_omp.cpp
@@ -33,7 +33,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairBornCoulLongOMP::PairBornCoulLongOMP(LAMMPS *lmp) :
-  PairBornCoulLong(lmp), ThrOMP(lmp, PAIR)
+  PairBornCoulLong(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -44,7 +44,6 @@ void PairBornCoulLongOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -52,40 +51,36 @@ void PairBornCoulLongOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairBornCoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairBornCoulLongOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
@@ -95,9 +90,10 @@ void PairBornCoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  int *type = atom->type;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
@@ -179,7 +175,7 @@ void PairBornCoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
 	} else evdwl = 0.0;
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,ecoul,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_born_coul_long_omp.h b/src/USER-OMP/pair_born_coul_long_omp.h
index d6ccbfc680..3271c566a4 100644
--- a/src/USER-OMP/pair_born_coul_long_omp.h
+++ b/src/USER-OMP/pair_born_coul_long_omp.h
@@ -39,7 +39,7 @@ class PairBornCoulLongOMP : public PairBornCoulLong, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_born_omp.cpp b/src/USER-OMP/pair_born_omp.cpp
index c39d205c97..d9dbf0d29e 100644
--- a/src/USER-OMP/pair_born_omp.cpp
+++ b/src/USER-OMP/pair_born_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairBornOMP::PairBornOMP(LAMMPS *lmp) :
-  PairBorn(lmp), ThrOMP(lmp, PAIR)
+  PairBorn(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairBornOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,38 +43,34 @@ void PairBornOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairBornOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairBornOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -84,7 +79,8 @@ void PairBornOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
@@ -143,7 +139,7 @@ void PairBornOMP::eval(double **f, int iifrom, int iito, int tid)
 	}
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_born_omp.h b/src/USER-OMP/pair_born_omp.h
index b24de4a577..7260644728 100644
--- a/src/USER-OMP/pair_born_omp.h
+++ b/src/USER-OMP/pair_born_omp.h
@@ -39,7 +39,7 @@ class PairBornOMP : public PairBorn, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_buck_coul_cut_omp.cpp b/src/USER-OMP/pair_buck_coul_cut_omp.cpp
index ac47d478a0..235f1c4f2c 100644
--- a/src/USER-OMP/pair_buck_coul_cut_omp.cpp
+++ b/src/USER-OMP/pair_buck_coul_cut_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairBuckCoulCutOMP::PairBuckCoulCutOMP(LAMMPS *lmp) :
-  PairBuckCoulCut(lmp), ThrOMP(lmp, PAIR)
+  PairBuckCoulCut(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairBuckCoulCutOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,40 +43,35 @@ void PairBuckCoulCutOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
-
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairBuckCoulCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairBuckCoulCutOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
@@ -86,8 +80,9 @@ void PairBuckCoulCutOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
@@ -162,7 +157,7 @@ void PairBuckCoulCutOMP::eval(double **f, int iifrom, int iito, int tid)
 	} else evdwl = 0.0;
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,ecoul,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_buck_coul_cut_omp.h b/src/USER-OMP/pair_buck_coul_cut_omp.h
index a77f3bad24..8fee0808c0 100644
--- a/src/USER-OMP/pair_buck_coul_cut_omp.h
+++ b/src/USER-OMP/pair_buck_coul_cut_omp.h
@@ -39,7 +39,7 @@ class PairBuckCoulCutOMP : public PairBuckCoulCut, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_buck_coul_long_omp.cpp b/src/USER-OMP/pair_buck_coul_long_omp.cpp
index 6e7398ca44..083b9acc6e 100644
--- a/src/USER-OMP/pair_buck_coul_long_omp.cpp
+++ b/src/USER-OMP/pair_buck_coul_long_omp.cpp
@@ -33,7 +33,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairBuckCoulLongOMP::PairBuckCoulLongOMP(LAMMPS *lmp) :
-  PairBuckCoulLong(lmp), ThrOMP(lmp, PAIR)
+  PairBuckCoulLong(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -44,7 +44,6 @@ void PairBuckCoulLongOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -52,40 +51,37 @@ void PairBuckCoulLongOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
     // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairBuckCoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairBuckCoulLongOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
@@ -95,8 +91,9 @@ void PairBuckCoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
@@ -178,7 +175,7 @@ void PairBuckCoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
 	} else evdwl = 0.0;
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,ecoul,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_buck_coul_long_omp.h b/src/USER-OMP/pair_buck_coul_long_omp.h
index 2c87904de8..a47e809eec 100644
--- a/src/USER-OMP/pair_buck_coul_long_omp.h
+++ b/src/USER-OMP/pair_buck_coul_long_omp.h
@@ -39,7 +39,7 @@ class PairBuckCoulLongOMP : public PairBuckCoulLong, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_buck_coul_omp.cpp b/src/USER-OMP/pair_buck_coul_omp.cpp
index bd171f628a..97299feeeb 100644
--- a/src/USER-OMP/pair_buck_coul_omp.cpp
+++ b/src/USER-OMP/pair_buck_coul_omp.cpp
@@ -34,7 +34,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairBuckCoulOMP::PairBuckCoulOMP(LAMMPS *lmp) :
-  PairBuckCoul(lmp), ThrOMP(lmp, PAIR)
+  PairBuckCoul(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -45,7 +45,6 @@ void PairBuckCoulOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -53,53 +52,50 @@ void PairBuckCoulOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairBuckCoulOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairBuckCoulOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   double evdwl,ecoul,fpair;
   evdwl = ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   double qqrd2e = force->qqrd2e;
 
-  double *x0 = x[0];
+  const double *x0 = x[0];
   double *f0 = f[0], *fi = f0;
 
   int *ilist = list->ilist;
@@ -129,7 +125,7 @@ void PairBuckCoulOMP::eval(double **f, int iifrom, int iito, int tid)
       ni = sbmask(j);
       j &= NEIGHMASK;
       
-      { register double *xj = x0+(j+(j<<1));
+      { const register double *xj = x0+(j+(j<<1));
 	d[0] = xi[0] - xj[0];				// pair vector
 	d[1] = xi[1] - xj[1];
 	d[2] = xi[2] - xj[2]; }
@@ -214,7 +210,7 @@ void PairBuckCoulOMP::eval(double **f, int iifrom, int iito, int tid)
       }
 
       if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
-			       evdwl,ecoul,fpair,d[0],d[1],d[2],tid);
+			       evdwl,ecoul,fpair,d[0],d[1],d[2],thr);
     }
   }
 }
diff --git a/src/USER-OMP/pair_buck_coul_omp.h b/src/USER-OMP/pair_buck_coul_omp.h
index dbff9b419a..823f64a4ab 100644
--- a/src/USER-OMP/pair_buck_coul_omp.h
+++ b/src/USER-OMP/pair_buck_coul_omp.h
@@ -39,7 +39,7 @@ class PairBuckCoulOMP : public PairBuckCoul, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_buck_omp.cpp b/src/USER-OMP/pair_buck_omp.cpp
index 66d8730abd..5806a3e796 100644
--- a/src/USER-OMP/pair_buck_omp.cpp
+++ b/src/USER-OMP/pair_buck_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairBuckOMP::PairBuckOMP(LAMMPS *lmp) :
-  PairBuck(lmp), ThrOMP(lmp, PAIR)
+  PairBuck(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairBuckOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,38 +43,34 @@ void PairBuckOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairBuckOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairBuckOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -84,7 +79,8 @@ void PairBuckOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
@@ -145,7 +141,7 @@ void PairBuckOMP::eval(double **f, int iifrom, int iito, int tid)
 	}
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_buck_omp.h b/src/USER-OMP/pair_buck_omp.h
index 40b6702e6f..c73e3f0d08 100644
--- a/src/USER-OMP/pair_buck_omp.h
+++ b/src/USER-OMP/pair_buck_omp.h
@@ -39,7 +39,7 @@ class PairBuckOMP : public PairBuck, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_cdeam_omp.cpp b/src/USER-OMP/pair_cdeam_omp.cpp
index 01bd5f6eaa..287b39ceb1 100644
--- a/src/USER-OMP/pair_cdeam_omp.cpp
+++ b/src/USER-OMP/pair_cdeam_omp.cpp
@@ -44,7 +44,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairCDEAMOMP::PairCDEAMOMP(LAMMPS *lmp, int _cdeamVersion) :
-  PairCDEAM(lmp,_cdeamVersion), PairEAM(lmp), ThrOMP(lmp, PAIR)
+  PairEAM(lmp), PairCDEAM(lmp,_cdeamVersion), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -55,7 +55,6 @@ void PairCDEAMOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -78,22 +77,19 @@ void PairCDEAMOMP::compute(int eflag, int vflag)
   }
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f, *rho_t, *rhoB_t, *D_values_t;
-
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
-    if (force->newton_pair) {
-      rho_t = rho + tid*nall;
-      rhoB_t = rhoB + tid*nall;
-      D_values_t = D_values + tid*nall;
-    } else {
-      rho_t = rho + tid*atom->nlocal;
-      rhoB_t = rhoB + tid*atom->nlocal;
-      D_values_t = D_values + tid*atom->nlocal;
-    }
+ 
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+    
+    if (force->newton_pair)
+      thr->init_cdeam(nall, rho, rhoB, D_values);
+    else
+      thr->init_cdeam(atom->nlocal, rho, rhoB, D_values);
 
     switch (cdeamVersion) {
 
@@ -101,15 +97,15 @@ void PairCDEAMOMP::compute(int eflag, int vflag)
   
       if (evflag) {
 	if (eflag) {
-	  if (force->newton_pair) eval<1,1,1,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
-	  else eval<1,1,0,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
+	  if (force->newton_pair) eval<1,1,1,1>(ifrom, ito, thr);
+	  else eval<1,1,0,1>(ifrom, ito, thr);
 	} else {
-	  if (force->newton_pair) eval<1,0,1,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
-	  else eval<1,0,0,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
+	  if (force->newton_pair) eval<1,0,1,1>(ifrom, ito, thr);
+	  else eval<1,0,0,1>(ifrom, ito, thr);
 	}
       } else {
-	if (force->newton_pair) eval<0,0,1,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
-	else eval<0,0,0,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
+	if (force->newton_pair) eval<0,0,1,1>(ifrom, ito, thr);
+	else eval<0,0,0,1>(ifrom, ito, thr);
       }
       break;
 
@@ -117,15 +113,15 @@ void PairCDEAMOMP::compute(int eflag, int vflag)
 
       if (evflag) {
 	if (eflag) {
-	  if (force->newton_pair) eval<1,1,1,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
-	  else eval<1,1,0,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
+	  if (force->newton_pair) eval<1,1,1,2>(ifrom, ito, thr);
+	  else eval<1,1,0,2>(ifrom, ito, thr);
 	} else {
-	  if (force->newton_pair) eval<1,0,1,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
-	  else eval<1,0,0,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
+	  if (force->newton_pair) eval<1,0,1,2>(ifrom, ito, thr);
+	  else eval<1,0,0,2>(ifrom, ito, thr);
 	}
       } else {
-	if (force->newton_pair) eval<0,0,1,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
-	else eval<0,0,0,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
+	if (force->newton_pair) eval<0,0,1,2>(ifrom, ito, thr);
+	else eval<0,0,0,2>(ifrom, ito, thr);
       }
       break;
 
@@ -136,18 +132,12 @@ void PairCDEAMOMP::compute(int eflag, int vflag)
     error->all(FLERR,"unsupported eam/cd pair style variant");
     }
     
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR, int CDEAMVERSION>
-void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t, 
-		      double *D_values_t, int iifrom, int iito, int tid)
+void PairCDEAMOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -156,10 +146,17 @@ void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t,
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  int nall = nlocal + atom->nghost;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  double * const rho_t = thr->get_rho();
+  double * const rhoB_t = thr->get_rhoB();
+  double * const D_values_t = thr->get_D_values();
+  const int tid = thr->get_tid();
+  const int nthreads = comm->nthreads;
+
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const int nall = nlocal + atom->nghost;
 
   double fxtmp,fytmp,fztmp;
 
@@ -167,18 +164,6 @@ void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t,
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
-  // zero out density 
-
-  if (NEWTON_PAIR) {
-    memset(rho_t, 0, nall*sizeof(double));
-    memset(rhoB_t, 0, nall*sizeof(double));
-    memset(D_values_t, 0, nall*sizeof(double));
-  } else {
-    memset(rho_t, 0, nlocal*sizeof(double));
-    memset(rhoB_t, 0, nlocal*sizeof(double));
-    memset(D_values_t, 0, nlocal*sizeof(double));
-  }
-
   // Stage I
 
   // Compute rho and rhoB at each local atom site.
@@ -240,10 +225,10 @@ void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t,
 
   if (NEWTON_PAIR) {
     // reduce per thread density
-    data_reduce_thr(&(rho[0]), nall, comm->nthreads, 1, tid);
-    data_reduce_thr(&(rhoB[0]), nall, comm->nthreads, 1, tid);
+    data_reduce_thr(rho, nall, nthreads, 1, tid);
+    data_reduce_thr(rhoB, nall, nthreads, 1, tid);
     if (CDEAMVERSION==1)
-      data_reduce_thr(&(D_values[0]), nall, comm->nthreads, 1, tid);
+      data_reduce_thr(D_values, nall, nthreads, 1, tid);
 
     // wait until reduction is complete
     sync_threads();
@@ -259,10 +244,10 @@ void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t,
   
   } else {
     // reduce per thread density
-    data_reduce_thr(&(rho[0]), nlocal, comm->nthreads, 1, tid);
-    data_reduce_thr(&(rhoB[0]), nlocal, comm->nthreads, 1, tid);
+    data_reduce_thr(rho, nlocal, nthreads, 1, tid);
+    data_reduce_thr(rhoB, nlocal, nthreads, 1, tid);
     if (CDEAMVERSION==1)
-      data_reduce_thr(&(D_values[0]), nlocal, comm->nthreads, 1, tid);
+      data_reduce_thr(D_values, nlocal, nthreads, 1, tid);
 
     // wait until reduction is complete
     sync_threads();
@@ -277,8 +262,7 @@ void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t,
     fp[i] = FPrimeOfRho(index, type[i]);
     if(EFLAG) {
       phi = FofRho(index, type[i]);
-      if (eflag_global) eng_vdwl_thr[tid] += phi;
-      if (eflag_atom) eatom_thr[tid][i] += phi;
+      e_tally_thr(this, i, i, nlocal, NEWTON_PAIR, phi, 0.0, thr);
     }
   }
 
@@ -360,7 +344,7 @@ void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t,
     }
 
     if (NEWTON_PAIR) {
-      data_reduce_thr(&(D_values[0]), nall, comm->nthreads, 1, tid);
+      data_reduce_thr(D_values, nall, nthreads, 1, tid);
 
       // wait until reduction is complete
       sync_threads();
@@ -375,7 +359,7 @@ void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t,
       sync_threads();
   
   } else {
-      data_reduce_thr(&(D_values[0]), nlocal, comm->nthreads, 1, tid);
+      data_reduce_thr(D_values, nlocal, nthreads, 1, tid);
 
     // wait until reduction is complete
     sync_threads();
@@ -525,7 +509,7 @@ void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t,
 
 	if(EFLAG) evdwl = phi;
 	if(EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,evdwl,0.0,
-				fpair,delx,dely,delz,tid);
+				fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_cdeam_omp.h b/src/USER-OMP/pair_cdeam_omp.h
index 85b124cb17..46f460f8fa 100644
--- a/src/USER-OMP/pair_cdeam_omp.h
+++ b/src/USER-OMP/pair_cdeam_omp.h
@@ -40,8 +40,7 @@ class PairCDEAMOMP : public PairCDEAM, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR, int CDEAMVERSION>
-  void eval(double **f, double *rho_t, double *rhoB_t, double *D_values_t, 
-	    int iifrom, int iito, int tid);
+  void eval(int iifrom, int iito, ThrData * const thr);
 };
 
   /// The one-site concentration formulation of CD-EAM.
diff --git a/src/USER-OMP/pair_colloid_omp.cpp b/src/USER-OMP/pair_colloid_omp.cpp
index c8bc74407a..7bfe1c04de 100644
--- a/src/USER-OMP/pair_colloid_omp.cpp
+++ b/src/USER-OMP/pair_colloid_omp.cpp
@@ -26,7 +26,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairColloidOMP::PairColloidOMP(LAMMPS *lmp) :
-  PairColloid(lmp), ThrOMP(lmp, PAIR)
+  PairColloid(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -37,7 +37,6 @@ void PairColloidOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -45,38 +44,34 @@ void PairColloidOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairColloidOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairColloidOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -86,10 +81,11 @@ void PairColloidOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -204,7 +200,7 @@ void PairColloidOMP::eval(double **f, int iifrom, int iito, int tid)
       }
 
       if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-			       evdwl,0.0,fpair,delx,dely,delz,tid);
+			       evdwl,0.0,fpair,delx,dely,delz,thr);
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
diff --git a/src/USER-OMP/pair_colloid_omp.h b/src/USER-OMP/pair_colloid_omp.h
index a0be13cbb4..cde7e9b650 100644
--- a/src/USER-OMP/pair_colloid_omp.h
+++ b/src/USER-OMP/pair_colloid_omp.h
@@ -39,7 +39,7 @@ class PairColloidOMP : public PairColloid, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_coul_cut_omp.cpp b/src/USER-OMP/pair_coul_cut_omp.cpp
index bb19db3d22..a8473eec38 100644
--- a/src/USER-OMP/pair_coul_cut_omp.cpp
+++ b/src/USER-OMP/pair_coul_cut_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairCoulCutOMP::PairCoulCutOMP(LAMMPS *lmp) :
-  PairCoulCut(lmp), ThrOMP(lmp, PAIR)
+  PairCoulCut(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairCoulCutOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,40 +43,36 @@ void PairCoulCutOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairCoulCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairCoulCutOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
@@ -86,12 +81,13 @@ void PairCoulCutOMP::eval(double **f, int iifrom, int iito, int tid)
 
   ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_coul = force->special_coul;
-  double qqrd2e = force->qqrd2e;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -142,7 +138,7 @@ void PairCoulCutOMP::eval(double **f, int iifrom, int iito, int tid)
 	  ecoul = factor_coul * qqrd2e * scale[itype][jtype] * qtmp*q[j]*rinv;
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 0.0,ecoul,fpair,delx,dely,delz,tid);
+				 0.0,ecoul,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_coul_cut_omp.h b/src/USER-OMP/pair_coul_cut_omp.h
index eca9958ff2..3499ee4ae6 100644
--- a/src/USER-OMP/pair_coul_cut_omp.h
+++ b/src/USER-OMP/pair_coul_cut_omp.h
@@ -39,7 +39,7 @@ class PairCoulCutOMP : public PairCoulCut, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_coul_debye_omp.cpp b/src/USER-OMP/pair_coul_debye_omp.cpp
index 1c2e7b8e07..73e579262e 100644
--- a/src/USER-OMP/pair_coul_debye_omp.cpp
+++ b/src/USER-OMP/pair_coul_debye_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairCoulDebyeOMP::PairCoulDebyeOMP(LAMMPS *lmp) :
-  PairCoulDebye(lmp), ThrOMP(lmp, PAIR)
+  PairCoulDebye(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairCoulDebyeOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,40 +43,36 @@ void PairCoulDebyeOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairCoulDebyeOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairCoulDebyeOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
@@ -86,12 +81,13 @@ void PairCoulDebyeOMP::eval(double **f, int iifrom, int iito, int tid)
 
   ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_coul = force->special_coul;
-  double qqrd2e = force->qqrd2e;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -144,7 +140,7 @@ void PairCoulDebyeOMP::eval(double **f, int iifrom, int iito, int tid)
 	  ecoul = factor_coul * qqrd2e * qtmp*q[j] * rinv * screening;
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 0.0,ecoul,fpair,delx,dely,delz,tid);
+				 0.0,ecoul,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_coul_debye_omp.h b/src/USER-OMP/pair_coul_debye_omp.h
index 7ad599bb1b..f016de8b5d 100644
--- a/src/USER-OMP/pair_coul_debye_omp.h
+++ b/src/USER-OMP/pair_coul_debye_omp.h
@@ -39,7 +39,7 @@ class PairCoulDebyeOMP : public PairCoulDebye, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_coul_long_omp.cpp b/src/USER-OMP/pair_coul_long_omp.cpp
index 3a2e051591..82f070d37d 100644
--- a/src/USER-OMP/pair_coul_long_omp.cpp
+++ b/src/USER-OMP/pair_coul_long_omp.cpp
@@ -33,7 +33,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairCoulLongOMP::PairCoulLongOMP(LAMMPS *lmp) :
-  PairCoulLong(lmp), ThrOMP(lmp, PAIR)
+  PairCoulLong(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -44,7 +44,6 @@ void PairCoulLongOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -52,40 +51,36 @@ void PairCoulLongOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairCoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairCoulLongOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itable,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
@@ -96,12 +91,13 @@ void PairCoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
 
   ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_coul = force->special_coul;
-  double qqrd2e = force->qqrd2e;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -181,7 +177,7 @@ void PairCoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
 	}
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 0.0,ecoul,fpair,delx,dely,delz,tid);
+				 0.0,ecoul,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_coul_long_omp.h b/src/USER-OMP/pair_coul_long_omp.h
index 7b63f762f2..d7655637d0 100644
--- a/src/USER-OMP/pair_coul_long_omp.h
+++ b/src/USER-OMP/pair_coul_long_omp.h
@@ -39,7 +39,7 @@ class PairCoulLongOMP : public PairCoulLong, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_dipole_cut_omp.cpp b/src/USER-OMP/pair_dipole_cut_omp.cpp
index 9ba93b19b5..85079dd718 100644
--- a/src/USER-OMP/pair_dipole_cut_omp.cpp
+++ b/src/USER-OMP/pair_dipole_cut_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairDipoleCutOMP::PairDipoleCutOMP(LAMMPS *lmp) :
-  PairDipoleCut(lmp), ThrOMP(lmp, PAIR)
+  PairDipoleCut(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairDipoleCutOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,40 +43,34 @@ void PairDipoleCutOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f, **torque;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
-    torque = atom->torque + tid*nall;
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, torque, ifrom, ito, tid);
-	else eval<1,1,0>(f, torque, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, torque, ifrom, ito, tid);
-	else eval<1,0,0>(f, torque, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, torque, ifrom, ito, tid);
-      else eval<0,0,0>(f, torque, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces and torques into global arrays.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
-    data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairDipoleCutOMP::eval(double **f, double **torque, int iifrom, int iito, int tid)
+void PairDipoleCutOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,qtmp,delx,dely,delz,evdwl,ecoul;
@@ -90,14 +83,16 @@ void PairDipoleCutOMP::eval(double **f, double **torque, int iifrom, int iito, i
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  double **mu = atom->mu;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_coul = force->special_coul;
-  double *special_lj = force->special_lj;
-  double qqrd2e = force->qqrd2e;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  double * const * const torque = thr->get_torque();
+  const double * const q = atom->q;
+  const double * const * const mu = atom->mu;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double * const special_lj = force->special_lj;
+  const double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp,t1tmp,t2tmp,t3tmp;
 
   ilist = list->ilist;
@@ -265,7 +260,7 @@ void PairDipoleCutOMP::eval(double **f, double **torque, int iifrom, int iito, i
 	}
 
 	if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR,
-				     evdwl,ecoul,fx,fy,fz,delx,dely,delz,tid);
+				     evdwl,ecoul,fx,fy,fz,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_dipole_cut_omp.h b/src/USER-OMP/pair_dipole_cut_omp.h
index 832bd4d3be..b175450c9f 100644
--- a/src/USER-OMP/pair_dipole_cut_omp.h
+++ b/src/USER-OMP/pair_dipole_cut_omp.h
@@ -39,7 +39,7 @@ class PairDipoleCutOMP : public PairDipoleCut, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, double **torque, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_dipole_sf_omp.cpp b/src/USER-OMP/pair_dipole_sf_omp.cpp
index 9ebc72d414..b920ff5c83 100644
--- a/src/USER-OMP/pair_dipole_sf_omp.cpp
+++ b/src/USER-OMP/pair_dipole_sf_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairDipoleSFOMP::PairDipoleSFOMP(LAMMPS *lmp) :
-  PairDipoleSF(lmp), ThrOMP(lmp, PAIR)
+  PairDipoleSF(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairDipoleSFOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,40 +43,34 @@ void PairDipoleSFOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f, **torque;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
-    torque = atom->torque + tid*nall;
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, torque, ifrom, ito, tid);
-	else eval<1,1,0>(f, torque, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, torque, ifrom, ito, tid);
-	else eval<1,0,0>(f, torque, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, torque, ifrom, ito, tid);
-      else eval<0,0,0>(f, torque, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces and torques into global arrays.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
-    data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairDipoleSFOMP::eval(double **f, double **torque, int iifrom, int iito, int tid)
+void PairDipoleSFOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,qtmp,delx,dely,delz,evdwl,ecoul;
@@ -94,14 +87,16 @@ void PairDipoleSFOMP::eval(double **f, double **torque, int iifrom, int iito, in
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  double **mu = atom->mu;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_coul = force->special_coul;
-  double *special_lj = force->special_lj;
-  double qqrd2e = force->qqrd2e;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  double * const * const torque = thr->get_torque();
+  const double * const q = atom->q;
+  const double * const * const mu = atom->mu;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double * const special_lj = force->special_lj;
+  const double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp,t1tmp,t2tmp,t3tmp;
 
   ilist = list->ilist;
@@ -297,7 +292,7 @@ void PairDipoleSFOMP::eval(double **f, double **torque, int iifrom, int iito, in
 	}
 
 	if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR,
-				     evdwl,ecoul,fx,fy,fz,delx,dely,delz,tid);
+				     evdwl,ecoul,fx,fy,fz,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_dipole_sf_omp.h b/src/USER-OMP/pair_dipole_sf_omp.h
index e601e2d569..89c80fa788 100644
--- a/src/USER-OMP/pair_dipole_sf_omp.h
+++ b/src/USER-OMP/pair_dipole_sf_omp.h
@@ -39,7 +39,7 @@ class PairDipoleSFOMP : public PairDipoleSF, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, double **torque, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_dpd_omp.cpp b/src/USER-OMP/pair_dpd_omp.cpp
index be1e32f37d..0d24ce401d 100644
--- a/src/USER-OMP/pair_dpd_omp.cpp
+++ b/src/USER-OMP/pair_dpd_omp.cpp
@@ -29,7 +29,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairDPDOMP::PairDPDOMP(LAMMPS *lmp) :
-  PairDPD(lmp), ThrOMP(lmp, PAIR)
+  PairDPD(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
   random_thr = NULL;
@@ -54,7 +54,6 @@ void PairDPDOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -63,46 +62,46 @@ void PairDPDOMP::compute(int eflag, int vflag)
 
   if (!random_thr)
     random_thr = new RanMars*[nthreads];
-  
+
+  // to ensure full compatibility with the serial DPD style
+  // we use is random number generator instance for thread 0
   random_thr[0] = random;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
+    // generate a random number generator instance for
+    // all threads != 0. make sure we use unique seeds.
     if (random_thr && tid > 0)
       random_thr[tid] = new RanMars(Pair::lmp, seed + comm->me 
 				    + comm->nprocs*tid);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairDPDOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairDPDOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -112,14 +111,15 @@ void PairDPDOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  double **v = atom->v;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
-  double dtinvsqrt = 1.0/sqrt(update->dt);
+  const double * const * const x = atom->x;
+  const double * const * const v = atom->v;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double *special_lj = force->special_lj;
+  const double dtinvsqrt = 1.0/sqrt(update->dt);
   double fxtmp,fytmp,fztmp;
-  RanMars &rng = *random_thr[tid];
+  RanMars &rng = *random_thr[thr->get_tid()];
 
   ilist = list->ilist;
   numneigh = list->numneigh;
@@ -190,7 +190,7 @@ void PairDPDOMP::eval(double **f, int iifrom, int iito, int tid)
 	}
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_dpd_omp.h b/src/USER-OMP/pair_dpd_omp.h
index 9385e5444f..c3802f8e60 100644
--- a/src/USER-OMP/pair_dpd_omp.h
+++ b/src/USER-OMP/pair_dpd_omp.h
@@ -43,7 +43,7 @@ class PairDPDOMP : public PairDPD, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_dpd_tstat_omp.cpp b/src/USER-OMP/pair_dpd_tstat_omp.cpp
index 7e3fb8b398..50a1bf439e 100644
--- a/src/USER-OMP/pair_dpd_tstat_omp.cpp
+++ b/src/USER-OMP/pair_dpd_tstat_omp.cpp
@@ -29,7 +29,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairDPDTstatOMP::PairDPDTstatOMP(LAMMPS *lmp) :
-  PairDPDTstat(lmp), ThrOMP(lmp, PAIR)
+  PairDPDTstat(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
   random_thr = NULL;
@@ -54,7 +54,6 @@ void PairDPDTstatOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -64,45 +63,45 @@ void PairDPDTstatOMP::compute(int eflag, int vflag)
   if (!random_thr)
     random_thr = new RanMars*[nthreads];
   
+  // to ensure full compatibility with the serial DPD style
+  // we use is random number generator instance for thread 0
   random_thr[0] = random;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
+    // generate a random number generator instance for
+    // all threads != 0. make sure we use unique seeds.
     if (random_thr && tid > 0)
       random_thr[tid] = new RanMars(Pair::lmp, seed + comm->me 
 				    + comm->nprocs*tid);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairDPDTstatOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairDPDTstatOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -112,14 +111,15 @@ void PairDPDTstatOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  double **v = atom->v;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
-  double dtinvsqrt = 1.0/sqrt(update->dt);
+  const double * const * const x = atom->x;
+  const double * const * const v = atom->v;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double *special_lj = force->special_lj;
+  const double dtinvsqrt = 1.0/sqrt(update->dt);
   double fxtmp,fytmp,fztmp;
-  RanMars &rng = *random_thr[tid];
+  RanMars &rng = *random_thr[thr->get_tid()];
 
   // adjust sigma if target T is changing
 
@@ -192,7 +192,7 @@ void PairDPDTstatOMP::eval(double **f, int iifrom, int iito, int tid)
 	}
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 0.0,0.0,fpair,delx,dely,delz,tid);
+				 0.0,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_dpd_tstat_omp.h b/src/USER-OMP/pair_dpd_tstat_omp.h
index 14f640a925..87c9de5505 100644
--- a/src/USER-OMP/pair_dpd_tstat_omp.h
+++ b/src/USER-OMP/pair_dpd_tstat_omp.h
@@ -43,7 +43,7 @@ class PairDPDTstatOMP : public PairDPDTstat, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_eam_omp.cpp b/src/USER-OMP/pair_eam_omp.cpp
index 0ae4d54fb7..c014eb75e2 100644
--- a/src/USER-OMP/pair_eam_omp.cpp
+++ b/src/USER-OMP/pair_eam_omp.cpp
@@ -28,7 +28,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairEAMOMP::PairEAMOMP(LAMMPS *lmp) :
-  PairEAM(lmp), ThrOMP(lmp, PAIR)
+  PairEAM(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -39,7 +39,6 @@ void PairEAMOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -58,42 +57,39 @@ void PairEAMOMP::compute(int eflag, int vflag)
   }
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f, *rho_t;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+    
     if (force->newton_pair)
-      rho_t = rho + tid*nall;
-    else rho_t = rho + tid*atom->nlocal;
+      thr->init_eam(nall, rho);
+    else
+      thr->init_eam(atom->nlocal, rho);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, rho_t, ifrom, ito, tid);
-	else eval<1,1,0>(f, rho_t, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, rho_t, ifrom, ito, tid);
-	else eval<1,0,0>(f, rho_t, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, rho_t, ifrom, ito, tid);
-      else eval<0,0,0>(f, rho_t, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairEAMOMP::eval(double **f, double *rho_t,
-		      int iifrom, int iito, int tid)
+void PairEAMOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,m,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -103,10 +99,15 @@ void PairEAMOMP::eval(double **f, double *rho_t,
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  int nall = nlocal + atom->nghost;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  double * const rho_t = thr->get_rho();
+  const int tid = thr->get_tid();
+  const int nthreads = comm->nthreads;
+
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const int nall = nlocal + atom->nghost;
 
   double fxtmp,fytmp,fztmp;
 
@@ -114,11 +115,6 @@ void PairEAMOMP::eval(double **f, double *rho_t,
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
-  // zero out density 
-
-  if (NEWTON_PAIR) memset(rho_t, 0, nall*sizeof(double));
-  else memset(rho_t, 0, nlocal*sizeof(double));
-
   // rho = density at each atom
   // loop over neighbors of my atoms
 
@@ -164,7 +160,7 @@ void PairEAMOMP::eval(double **f, double *rho_t,
 
   if (NEWTON_PAIR) {
     // reduce per thread density
-    data_reduce_thr(&(rho[0]), nall, comm->nthreads, 1, tid);
+    data_reduce_thr(rho, nall, nthreads, 1, tid);
 
     // wait until reduction is complete
     sync_threads();
@@ -178,7 +174,7 @@ void PairEAMOMP::eval(double **f, double *rho_t,
     sync_threads();
   
   } else {
-    data_reduce_thr(&(rho[0]), nlocal, comm->nthreads, 1, tid);
+    data_reduce_thr(rho, nlocal, nthreads, 1, tid);
 
     // wait until reduction is complete
     sync_threads();
@@ -198,8 +194,7 @@ void PairEAMOMP::eval(double **f, double *rho_t,
     fp[i] = (coeff[0]*p + coeff[1])*p + coeff[2];
     if (EFLAG) {
       phi = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
-      if (eflag_global) eng_vdwl_thr[tid] += phi;
-      if (eflag_atom) eatom_thr[tid][i] += phi;
+      e_tally_thr(this, i, i, nlocal, NEWTON_PAIR, phi, 0.0, thr);
     }
   }
 
@@ -283,7 +278,7 @@ void PairEAMOMP::eval(double **f, double *rho_t,
 
 	if (EFLAG) evdwl = phi;
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_eam_omp.h b/src/USER-OMP/pair_eam_omp.h
index 1184cb34bc..6b0f1274fc 100644
--- a/src/USER-OMP/pair_eam_omp.h
+++ b/src/USER-OMP/pair_eam_omp.h
@@ -39,7 +39,7 @@ class PairEAMOMP : public PairEAM, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, double *rho_t, int iifrom, int iito, int tid);
+  void eval(int iifrom, int iito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_edip_omp.cpp b/src/USER-OMP/pair_edip_omp.cpp
index 65b05c8143..f0d6d47cec 100644
--- a/src/USER-OMP/pair_edip_omp.cpp
+++ b/src/USER-OMP/pair_edip_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairEDIPOMP::PairEDIPOMP(LAMMPS *lmp) :
-  PairEDIP(lmp), ThrOMP(lmp, PAIR)
+  PairEDIP(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairEDIPOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = vflag_atom = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,35 +43,31 @@ void PairEDIPOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (vflag_atom) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (vflag_atom) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (vflag_atom) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (vflag_atom) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
-    } else eval<0,0,0>(f, ifrom, ito, tid);
+    } else eval<0,0,0>(ifrom, ito, thr);
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int VFLAG_ATOM>
-void PairEDIPOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairEDIPOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,k,ii,inum,jnum;
   int itype,jtype,ktype,ijparam,ikparam,ijkparam;
@@ -133,6 +128,8 @@ void PairEDIPOMP::eval(double **f, int iifrom, int iito, int tid)
   double potentia3B_factor;
   double potential2B_factor;
 
+  const int tid = thr->get_tid();
+
   double *pre_thrInvR_ij = preInvR_ij + tid * leadDimInteractionList;
   double *pre_thrExp3B_ij = preExp3B_ij + tid * leadDimInteractionList;
   double *pre_thrExp3BDerived_ij = preExp3BDerived_ij + tid * leadDimInteractionList;
@@ -141,9 +138,10 @@ void PairEDIPOMP::eval(double **f, int iifrom, int iito, int tid)
   double *pre_thrPow2B_ij = prePow2B_ij + tid * leadDimInteractionList;
   double *pre_thrForceCoord = preForceCoord + tid * leadDimInteractionList;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
 
   inum = list->inum;
   ilist = list->ilist;
@@ -340,7 +338,7 @@ void PairEDIPOMP::eval(double **f, int iifrom, int iito, int tid)
       evdwl = (exp2B_ij * potential2B_factor);
 
       if (EVFLAG) ev_tally_thr(this,i, j, nlocal, /* newton_pair */ 1, evdwl, 0.0,
-			       -forceMod2B*invR_ij, dr_ij[0], dr_ij[1], dr_ij[2],tid);
+			       -forceMod2B*invR_ij, dr_ij[0], dr_ij[1], dr_ij[2],thr);
 
       // three-body Forces
 
@@ -435,7 +433,7 @@ void PairEDIPOMP::eval(double **f, int iifrom, int iito, int tid)
 
           evdwl = (exp3B_ij * exp3B_ik * potentia3B_factor);
 
-          if (evflag) ev_tally3(i,j,k,evdwl,0.0,f_ij,f_ik,dr_ij,dr_ik);
+          if (evflag) ev_tally3_thr(this,i,j,k,evdwl,0.0,f_ij,f_ik,dr_ij,dr_ik,thr);
       }
     }
 
@@ -469,7 +467,7 @@ void PairEDIPOMP::eval(double **f, int iifrom, int iito, int tid)
 
         evdwl = 0.0;
         if (EVFLAG) ev_tally_thr(this,i, j, nlocal, /* newton_pair */ 1, 0.0, 0.0,
-				 forceModCoord_ij, dr_ij[0], dr_ij[1], dr_ij[2],tid);
+				 forceModCoord_ij, dr_ij[0], dr_ij[1], dr_ij[2],thr);
     }
   }
 }
diff --git a/src/USER-OMP/pair_edip_omp.h b/src/USER-OMP/pair_edip_omp.h
index 55c34db345..55e10c83bb 100644
--- a/src/USER-OMP/pair_edip_omp.h
+++ b/src/USER-OMP/pair_edip_omp.h
@@ -34,7 +34,7 @@ class PairEDIPOMP : public PairEDIP, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int VFLAG_ATOM>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_eim_omp.cpp b/src/USER-OMP/pair_eim_omp.cpp
index d31ad20120..7184adb781 100644
--- a/src/USER-OMP/pair_eim_omp.cpp
+++ b/src/USER-OMP/pair_eim_omp.cpp
@@ -28,7 +28,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairEIMOMP::PairEIMOMP(LAMMPS *lmp) :
-  PairEIM(lmp), ThrOMP(lmp, PAIR)
+  PairEIM(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -39,7 +39,6 @@ void PairEIMOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -58,46 +57,39 @@ void PairEIMOMP::compute(int eflag, int vflag)
   }
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f, *rho_t, *fp_t;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
-    if (force->newton_pair) {
-      rho_t = rho + tid*nall;
-      fp_t = fp + tid*nall;
-    } else {
-      rho_t = rho + tid*atom->nlocal;
-      fp_t = fp + tid*atom->nlocal;
-    }
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+    
+    if (force->newton_pair)
+      thr->init_eim(nall, rho, fp);
+    else
+      thr->init_eim(atom->nlocal, rho, fp);
     
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, rho_t, fp_t, ifrom, ito, tid);
-	else eval<1,1,0>(f, rho_t, fp_t, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, rho_t, fp_t, ifrom, ito, tid);
-	else eval<1,0,0>(f, rho_t, fp_t, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, rho_t, fp_t, ifrom, ito, tid);
-      else eval<0,0,0>(f, rho_t, fp_t, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t,
-		      int iifrom, int iito, int tid)
+void PairEIMOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,m,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -107,10 +99,17 @@ void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t,
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  int nall = nlocal + atom->nghost;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  double * const rho_t = thr->get_rho();
+  double * const fp_t = thr->get_fp();
+  const int tid = thr->get_tid();
+  const int nthreads = comm->nthreads;
+
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const int nall = nlocal + atom->nghost;
 
   double fxtmp,fytmp,fztmp;
 
@@ -118,16 +117,6 @@ void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t,
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
-  // zero out density and fp
-
-  if (NEWTON_PAIR) {
-    memset(rho_t, 0, nall*sizeof(double));
-    memset(fp_t, 0, nall*sizeof(double));
-  } else {
-    memset(rho_t, 0, nlocal*sizeof(double));
-    memset(fp_t, 0, nlocal*sizeof(double));
-  }
-
   // rho = density at each atom
   // loop over neighbors of my atoms
 
@@ -171,7 +160,7 @@ void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t,
   // communicate and sum densities
   if (NEWTON_PAIR) {
     // reduce per thread density
-    data_reduce_thr(&(rho[0]), nall, comm->nthreads, 1, tid);
+    data_reduce_thr(rho, nall, nthreads, 1, tid);
 
     // wait until reduction is complete
     sync_threads();
@@ -185,7 +174,7 @@ void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t,
     }
 
   } else {
-    data_reduce_thr(&(rho[0]), nlocal, comm->nthreads, 1, tid);
+    data_reduce_thr(rho, nlocal, nthreads, 1, tid);
 
     // wait until reduction is complete
     sync_threads();
@@ -243,7 +232,7 @@ void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t,
   // communicate and sum modified densities
   if (NEWTON_PAIR) {
     // reduce per thread density
-    data_reduce_thr(&(fp[0]), nall, comm->nthreads, 1, tid);
+    data_reduce_thr(fp, nall, nthreads, 1, tid);
 
     // wait until reduction is complete
     sync_threads();
@@ -257,7 +246,7 @@ void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t,
     }
 
   } else {
-    data_reduce_thr(&(fp[0]), nlocal, comm->nthreads, 1, tid);
+    data_reduce_thr(fp, nlocal, nthreads, 1, tid);
 
     // wait until reduction is complete
     sync_threads();
@@ -279,8 +268,7 @@ void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t,
     itype = type[i];
     if (EFLAG) {
       phi = 0.5*rho[i]*fp[i];
-      if (eflag_global) eng_vdwl_thr[tid] += phi;
-      if (eflag_atom) eatom_thr[tid][i] += phi;
+      e_tally_thr(this, i, i, nlocal, NEWTON_PAIR, phi, 0.0, thr);
     }
   }
 
@@ -345,7 +333,7 @@ void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t,
 
 	if (EFLAG) evdwl = phi-q0[itype]*q0[jtype]*coul;
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_eim_omp.h b/src/USER-OMP/pair_eim_omp.h
index 3693492e09..ad273e28eb 100644
--- a/src/USER-OMP/pair_eim_omp.h
+++ b/src/USER-OMP/pair_eim_omp.h
@@ -39,7 +39,7 @@ class PairEIMOMP : public PairEIM, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, double *rho_t, double *fp_t, int iifrom, int iito, int tid);
+  void eval(int iifrom, int iito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_gauss_omp.cpp b/src/USER-OMP/pair_gauss_omp.cpp
index e8b255d0b7..4f26670715 100644
--- a/src/USER-OMP/pair_gauss_omp.cpp
+++ b/src/USER-OMP/pair_gauss_omp.cpp
@@ -26,7 +26,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairGaussOMP::PairGaussOMP(LAMMPS *lmp) :
-  PairGauss(lmp), ThrOMP(lmp, PAIR)
+  PairGauss(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -37,46 +37,44 @@ void PairGaussOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
+  double occ = 0.0;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag) reduction(+:occ)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) occ = eval<1,1,1>(ifrom, ito, thr);
+	else occ = eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) occ = eval<1,0,1>(ifrom, ito, thr);
+	else occ = eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) occ = eval<0,0,1>(ifrom, ito, thr);
+      else occ = eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
+  if (eflag_global) pvector[0] = occ;
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairGaussOMP::eval(double **f, int iifrom, int iito, int tid)
+double PairGaussOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -86,10 +84,11 @@ void PairGaussOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -149,14 +148,14 @@ void PairGaussOMP::eval(double **f, int iifrom, int iito, int tid)
 	}
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
-  if (eflag_global) pvector[0] = occ;
+  return occ;
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/USER-OMP/pair_gauss_omp.h b/src/USER-OMP/pair_gauss_omp.h
index 7f8fc9a85b..81d9d0ce3f 100644
--- a/src/USER-OMP/pair_gauss_omp.h
+++ b/src/USER-OMP/pair_gauss_omp.h
@@ -39,7 +39,7 @@ class PairGaussOMP : public PairGauss, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  double eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_gayberne_omp.cpp b/src/USER-OMP/pair_gayberne_omp.cpp
index ff115e8ef7..d8ec6c9b32 100644
--- a/src/USER-OMP/pair_gayberne_omp.cpp
+++ b/src/USER-OMP/pair_gayberne_omp.cpp
@@ -27,7 +27,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairGayBerneOMP::PairGayBerneOMP(LAMMPS *lmp) :
-  PairGayBerne(lmp), ThrOMP(lmp, PAIR)
+  PairGayBerne(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -38,7 +38,6 @@ void PairGayBerneOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -46,40 +45,34 @@ void PairGayBerneOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f, **torque;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
-    torque = atom->torque + tid*nall;
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, torque, ifrom, ito, tid);
-	else eval<1,1,0>(f, torque, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, torque, ifrom, ito, tid);
-	else eval<1,0,0>(f, torque, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, torque, ifrom, ito, tid);
-      else eval<0,0,0>(f, torque, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces and torques into global arrays.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
-    data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairGayBerneOMP::eval(double **f, double **tor, int iifrom, int iito, int tid)
+void PairGayBerneOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double evdwl,one_eng,rsq,r2inv,r6inv,forcelj,factor_lj;
@@ -88,11 +81,13 @@ void PairGayBerneOMP::eval(double **f, double **tor, int iifrom, int iito, int t
   int *ilist,*jlist,*numneigh,**firstneigh;
   double *iquat,*jquat;
 
-  double **x = atom->x;
-  int *ellipsoid = atom->ellipsoid;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  double * const * const tor = thr->get_torque();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
+  const int * const ellipsoid = atom->ellipsoid;
 
   AtomVecEllipsoid::Bonus *bonus = avec->bonus;
 
@@ -108,6 +103,7 @@ void PairGayBerneOMP::eval(double **f, double **tor, int iifrom, int iito, int t
 
     i = ilist[ii];
     itype = type[i];
+    fxtmp=fytmp=fztmp=t1tmp=t2tmp=t3tmp=0.0;
 
     if (form[itype][itype] == ELLIPSE_ELLIPSE) {
       iquat = bonus[ellipsoid[i]].quat;
@@ -187,12 +183,12 @@ void PairGayBerneOMP::eval(double **f, double **tor, int iifrom, int iito, int t
 	ttor[1] *= factor_lj;
 	ttor[2] *= factor_lj;
 
-        f[i][0] += fforce[0];
-	f[i][1] += fforce[1];
-	f[i][2] += fforce[2];
-        tor[i][0] += ttor[0];
-	tor[i][1] += ttor[1];
-	tor[i][2] += ttor[2];
+        fxtmp += fforce[0];
+	fytmp += fforce[1];
+	fztmp += fforce[2];
+        t1tmp += ttor[0];
+	t2tmp += ttor[1];
+	t3tmp += ttor[2];
 
         if (NEWTON_PAIR || j < nlocal) {
           rtor[0] *= factor_lj;
@@ -210,9 +206,15 @@ void PairGayBerneOMP::eval(double **f, double **tor, int iifrom, int iito, int t
 
 	if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR,
 				     evdwl,0.0,fforce[0],fforce[1],fforce[2],
-				     -r12[0],-r12[1],-r12[2],tid);
+				     -r12[0],-r12[1],-r12[2],thr);
       }
     }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+    tor[i][0] += t1tmp;
+    tor[i][1] += t2tmp;
+    tor[i][2] += t3tmp;
   }
 }
 
diff --git a/src/USER-OMP/pair_gayberne_omp.h b/src/USER-OMP/pair_gayberne_omp.h
index 737b4ec67d..0bd0b8b086 100644
--- a/src/USER-OMP/pair_gayberne_omp.h
+++ b/src/USER-OMP/pair_gayberne_omp.h
@@ -39,7 +39,7 @@ class PairGayBerneOMP : public PairGayBerne, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, double **torque, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_gran_hertz_history_omp.cpp b/src/USER-OMP/pair_gran_hertz_history_omp.cpp
index 1866833afe..23b8b8f5c2 100644
--- a/src/USER-OMP/pair_gran_hertz_history_omp.cpp
+++ b/src/USER-OMP/pair_gran_hertz_history_omp.cpp
@@ -26,7 +26,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairGranHertzHistoryOMP::PairGranHertzHistoryOMP(LAMMPS *lmp) :
-  PairGranHertzHistory(lmp), ThrOMP(lmp, PAIR)
+  PairGranHertzHistory(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -37,7 +37,6 @@ void PairGranHertzHistoryOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int shearupdate = (update->ntimestep > laststep) ? 1 : 0;
@@ -47,35 +46,29 @@ void PairGranHertzHistoryOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f, **torque;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
-    torque = atom->torque + tid*nall;
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag)
-      if (shearupdate) eval<1,1>(f, torque, ifrom, ito, tid);
-      else eval<1,0>(f, torque, ifrom, ito, tid);
+      if (shearupdate) eval<1,1>(ifrom, ito, thr);
+      else eval<1,0>(ifrom, ito, thr);
     else 
-      if (shearupdate) eval<0,1>(f, torque, ifrom, ito, tid);
-      else eval<0,0>(f, torque, ifrom, ito, tid);
+      if (shearupdate) eval<0,1>(ifrom, ito, thr);
+      else eval<0,0>(ifrom, ito, thr);
 
-    // reduce per thread forces and torque into global arrays.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
-    data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-
   laststep = update->ntimestep;
 }
 
 template <int EVFLAG, int SHEARUPDATE>
-void PairGranHertzHistoryOMP::eval(double **f, double **torque, int iifrom, int iito, int tid)
+void PairGranHertzHistoryOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz;
@@ -90,15 +83,17 @@ void PairGranHertzHistoryOMP::eval(double **f, double **torque, int iifrom, int
   int *touch,**firsttouch;
   double *shear,*allshear,**firstshear;
 
-  double **x = atom->x;
-  double **v = atom->v;
-  double **omega = atom->omega;
-  double *radius = atom->radius;
-  double *rmass = atom->rmass;
-  double *mass = atom->mass;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  int nlocal = atom->nlocal;
+  const double * const * const x = atom->x;
+  const double * const * const v = atom->v;
+  const double * const * const omega = atom->omega;
+  const double * const radius = atom->radius;
+  const double * const rmass = atom->rmass;
+  const double * const mass = atom->mass;
+  double * const * const f = thr->get_f();
+  double * const * const torque = thr->get_torque();
+  const int * const type = atom->type;
+  const int * const mask = atom->mask;
+  const int nlocal = atom->nlocal;
   double fxtmp,fytmp,fztmp;
   double t1tmp,t2tmp,t3tmp;
 
@@ -274,7 +269,7 @@ void PairGranHertzHistoryOMP::eval(double **f, double **torque, int iifrom, int
 	}
 
 	if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,/* newton_pair */ 0,
-				     0.0,0.0,fx,fy,fz,delx,dely,delz,tid);
+				     0.0,0.0,fx,fy,fz,delx,dely,delz,thr);
 
       }
     }
diff --git a/src/USER-OMP/pair_gran_hertz_history_omp.h b/src/USER-OMP/pair_gran_hertz_history_omp.h
index 66d7bc0fa5..956e057093 100644
--- a/src/USER-OMP/pair_gran_hertz_history_omp.h
+++ b/src/USER-OMP/pair_gran_hertz_history_omp.h
@@ -39,7 +39,7 @@ class PairGranHertzHistoryOMP : public PairGranHertzHistory, public ThrOMP {
 
  private:
   template <int EVFLAG, int SHEARUPDATE>
-  void eval(double **f, double **torque, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_gran_hooke_history_omp.cpp b/src/USER-OMP/pair_gran_hooke_history_omp.cpp
index ad0537b516..5212b30ce2 100644
--- a/src/USER-OMP/pair_gran_hooke_history_omp.cpp
+++ b/src/USER-OMP/pair_gran_hooke_history_omp.cpp
@@ -28,7 +28,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairGranHookeHistoryOMP::PairGranHookeHistoryOMP(LAMMPS *lmp) :
-  PairGranHookeHistory(lmp), ThrOMP(lmp, PAIR)
+  PairGranHookeHistory(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
   // trigger use of OpenMP version of FixShearHistory
@@ -42,7 +42,6 @@ void PairGranHookeHistoryOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int shearupdate = (update->ntimestep > laststep) ? 1 : 0;
@@ -52,38 +51,33 @@ void PairGranHookeHistoryOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f, **torque;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
-    torque = atom->torque + tid*nall;
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag)
-      if (shearupdate) eval<1,1>(f, torque, ifrom, ito, tid);
-      else eval<1,0>(f, torque, ifrom, ito, tid);
+      if (shearupdate) eval<1,1>(ifrom, ito, thr);
+      else eval<1,0>(ifrom, ito, thr);
     else 
-      if (shearupdate) eval<0,1>(f, torque, ifrom, ito, tid);
-      else eval<0,0>(f, torque, ifrom, ito, tid);
+      if (shearupdate) eval<0,1>(ifrom, ito, thr);
+      else eval<0,0>(ifrom, ito, thr);
 
-    // reduce per thread forces and torque into global arrays.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
-    data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-
   laststep = update->ntimestep;
 }
 
 template <int EVFLAG, int SHEARUPDATE>
-void PairGranHookeHistoryOMP::eval(double **f, double **torque, int iifrom, int iito, int tid)
+void PairGranHookeHistoryOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz;
+  double myshear[3];
   double radi,radj,radsum,rsq,r,rinv,rsqinv;
   double vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3;
   double wr1,wr2,wr3;
@@ -95,15 +89,17 @@ void PairGranHookeHistoryOMP::eval(double **f, double **torque, int iifrom, int
   int *touch,**firsttouch;
   double *shear,*allshear,**firstshear;
 
-  double **x = atom->x;
-  double **v = atom->v;
-  double **omega = atom->omega;
-  double *radius = atom->radius;
-  double *rmass = atom->rmass;
-  double *mass = atom->mass;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  int nlocal = atom->nlocal;
+  const double * const * const x = atom->x;
+  const double * const * const v = atom->v;
+  const double * const * const omega = atom->omega;
+  const double * const radius = atom->radius;
+  const double * const rmass = atom->rmass;
+  const double * const mass = atom->mass;
+  double * const * const f = thr->get_f();
+  double * const * const torque = thr->get_torque();
+  const int * const type = atom->type;
+  const int * const mask = atom->mask;
+  const int nlocal = atom->nlocal;
   double fxtmp,fytmp,fztmp;
   double t1tmp,t2tmp,t3tmp;
 
@@ -144,10 +140,9 @@ void PairGranHookeHistoryOMP::eval(double **f, double **torque, int iifrom, int
 	// unset non-touching neighbors
 
         touch[jj] = 0;
-	shear = &allshear[3*jj];
-        shear[0] = 0.0;
-        shear[1] = 0.0;
-        shear[2] = 0.0;
+        myshear[0] = 0.0;
+        myshear[1] = 0.0;
+        myshear[2] = 0.0;
 
       } else {
 	r = sqrt(rsq);
@@ -186,7 +181,6 @@ void PairGranHookeHistoryOMP::eval(double **f, double **torque, int iifrom, int
 	  if (mask[i] & freeze_group_bit) meff = rmass[j];
 	  if (mask[j] & freeze_group_bit) meff = rmass[i];
 	} else {
-	  itype = type[i];
 	  jtype = type[j];
 	  meff = mass[itype]*mass[jtype] / (mass[itype]+mass[jtype]);
 	  if (mask[i] & freeze_group_bit) meff = mass[jtype];
@@ -207,31 +201,31 @@ void PairGranHookeHistoryOMP::eval(double **f, double **torque, int iifrom, int
 	// shear history effects
 
 	touch[jj] = 1;
-	shear = &allshear[3*jj];
+	memcpy(myshear,allshear + 3*jj, 3*sizeof(double));
 
 	if (SHEARUPDATE) {
-	  shear[0] += vtr1*dt;
-	  shear[1] += vtr2*dt;
-	  shear[2] += vtr3*dt;
+	  myshear[0] += vtr1*dt;
+	  myshear[1] += vtr2*dt;
+	  myshear[2] += vtr3*dt;
 	}
-        shrmag = sqrt(shear[0]*shear[0] + shear[1]*shear[1] +
-		      shear[2]*shear[2]);
+        shrmag = sqrt(myshear[0]*myshear[0] + myshear[1]*myshear[1] +
+		      myshear[2]*myshear[2]);
 
 	// rotate shear displacements
 
-	rsht = shear[0]*delx + shear[1]*dely + shear[2]*delz;
+	rsht = myshear[0]*delx + myshear[1]*dely + myshear[2]*delz;
 	rsht *= rsqinv;
 	if (SHEARUPDATE) {
-	  shear[0] -= rsht*delx;
-	  shear[1] -= rsht*dely;
-	  shear[2] -= rsht*delz;
+	  myshear[0] -= rsht*delx;
+	  myshear[1] -= rsht*dely;
+	  myshear[2] -= rsht*delz;
 	}
 
 	// tangential forces = shear + tangential velocity damping
 
-	fs1 = - (kt*shear[0] + meff*gammat*vtr1);
-	fs2 = - (kt*shear[1] + meff*gammat*vtr2);
-	fs3 = - (kt*shear[2] + meff*gammat*vtr3);
+	fs1 = - (kt*myshear[0] + meff*gammat*vtr1);
+	fs2 = - (kt*myshear[1] + meff*gammat*vtr2);
+	fs3 = - (kt*myshear[2] + meff*gammat*vtr3);
 
 	// rescale frictional displacements and forces if needed
 
@@ -242,9 +236,9 @@ void PairGranHookeHistoryOMP::eval(double **f, double **torque, int iifrom, int
 	  if (shrmag != 0.0) {
 	    const double fnfs = fn/fs;
 	    const double mgkt = meff*gammat/kt;
-	    shear[0] = fnfs * (shear[0] + mgkt*vtr1) - mgkt*vtr1;
-	    shear[1] = fnfs * (shear[1] + mgkt*vtr2) - mgkt*vtr2;
-	    shear[2] = fnfs * (shear[2] + mgkt*vtr3) - mgkt*vtr3;
+	    myshear[0] = fnfs * (myshear[0] + mgkt*vtr1) - mgkt*vtr1;
+	    myshear[1] = fnfs * (myshear[1] + mgkt*vtr2) - mgkt*vtr2;
+	    myshear[2] = fnfs * (myshear[2] + mgkt*vtr3) - mgkt*vtr3;
 	    fs1 *= fnfs;
 	    fs2 *= fnfs;
 	    fs3 *= fnfs;
@@ -277,9 +271,10 @@ void PairGranHookeHistoryOMP::eval(double **f, double **torque, int iifrom, int
 	}
 
 	if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,/* newton_pair */ 0,
-				     0.0,0.0,fx,fy,fz,delx,dely,delz,tid);
+				     0.0,0.0,fx,fy,fz,delx,dely,delz,thr);
 
       }
+      memcpy(allshear + 3*jj, myshear, 3*sizeof(double));
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
diff --git a/src/USER-OMP/pair_gran_hooke_history_omp.h b/src/USER-OMP/pair_gran_hooke_history_omp.h
index 33325025fc..7588469e74 100644
--- a/src/USER-OMP/pair_gran_hooke_history_omp.h
+++ b/src/USER-OMP/pair_gran_hooke_history_omp.h
@@ -39,7 +39,7 @@ class PairGranHookeHistoryOMP : public PairGranHookeHistory, public ThrOMP {
 
  private:
   template <int EVFLAG, int SHEARUPDATE>
-  void eval(double **f, double **torque, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_gran_hooke_omp.cpp b/src/USER-OMP/pair_gran_hooke_omp.cpp
index d6991fa453..fda9295b70 100644
--- a/src/USER-OMP/pair_gran_hooke_omp.cpp
+++ b/src/USER-OMP/pair_gran_hooke_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairGranHookeOMP::PairGranHookeOMP(LAMMPS *lmp) :
-  PairGranHooke(lmp), ThrOMP(lmp, PAIR)
+  PairGranHooke(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairGranHookeOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,33 +43,28 @@ void PairGranHookeOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f, **torque;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
-    torque = atom->torque + tid*nall;
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag)
-      if (force->newton_pair) eval<1,1>(f, torque, ifrom, ito, tid);
-      else eval<1,0>(f, torque, ifrom, ito, tid);
+      if (force->newton_pair) eval<1,1>(ifrom, ito, thr);
+      else eval<1,0>(ifrom, ito, thr);
     else 
-      if (force->newton_pair) eval<0,1>(f, torque, ifrom, ito, tid);
-      else eval<0,0>(f, torque, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,1>(ifrom, ito, thr);
+      else eval<0,0>(ifrom, ito, thr);
 
-    // reduce per thread forces and torque into global arrays.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
-    data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
 }
 
 template <int EVFLAG, int NEWTON_PAIR>
-void PairGranHookeOMP::eval(double **f, double **torque, int iifrom, int iito, int tid)
+void PairGranHookeOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz;
@@ -82,15 +76,17 @@ void PairGranHookeOMP::eval(double **f, double **torque, int iifrom, int iito, i
   double fn,fs,ft,fs1,fs2,fs3;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
-  double **x = atom->x;
-  double **v = atom->v;
-  double **omega = atom->omega;
-  double *radius = atom->radius;
-  double *rmass = atom->rmass;
-  double *mass = atom->mass;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  int nlocal = atom->nlocal;
+  const double * const * const x = atom->x;
+  const double * const * const v = atom->v;
+  const double * const * const omega = atom->omega;
+  const double * const radius = atom->radius;
+  const double * const rmass = atom->rmass;
+  const double * const mass = atom->mass;
+  double * const * const f = thr->get_f();
+  double * const * const torque = thr->get_torque();
+  const int * const type = atom->type;
+  const int * const mask = atom->mask;
+  const int nlocal = atom->nlocal;
   double fxtmp,fytmp,fztmp;
   double t1tmp,t2tmp,t3tmp;
 
@@ -216,7 +212,7 @@ void PairGranHookeOMP::eval(double **f, double **torque, int iifrom, int iito, i
 	}
 
 	if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR,
-				     0.0,0.0,fx,fy,fz,delx,dely,delz,tid);
+				     0.0,0.0,fx,fy,fz,delx,dely,delz,thr);
 
       }
     }
diff --git a/src/USER-OMP/pair_gran_hooke_omp.h b/src/USER-OMP/pair_gran_hooke_omp.h
index f2b093778c..b275992bfa 100644
--- a/src/USER-OMP/pair_gran_hooke_omp.h
+++ b/src/USER-OMP/pair_gran_hooke_omp.h
@@ -39,7 +39,7 @@ class PairGranHookeOMP : public PairGranHooke, public ThrOMP {
 
  private:
   template <int EVFLAG, int NEWTON_PAIR>
-  void eval(double **f, double **torque, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp b/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp
index 012fd596b3..5da3f2bdfa 100644
--- a/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp
+++ b/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp
@@ -31,7 +31,7 @@ using namespace MathConst;
 /* ---------------------------------------------------------------------- */
 
 PairHbondDreidingLJOMP::PairHbondDreidingLJOMP(LAMMPS *lmp) :
-  PairHbondDreidingLJ(lmp), ThrOMP(lmp, PAIR)
+  PairHbondDreidingLJ(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
   hbcount_thr = hbeng_thr = NULL;
@@ -54,7 +54,6 @@ void PairHbondDreidingLJOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -72,35 +71,31 @@ void PairHbondDreidingLJOMP::compute(int eflag, int vflag)
   }
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
-
   // reduce per thread hbond data
   if (eflag_global) {
     pvector[0] = 0.0;
@@ -113,25 +108,26 @@ void PairHbondDreidingLJOMP::compute(int eflag, int vflag)
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairHbondDreidingLJOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairHbondDreidingLJOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
-  int i,j,k,m,ii,jj,kk,jnum,knum,itype,jtype,ktype;
+  int i,j,k,m,ii,jj,kk,jnum,itype,jtype,ktype;
   double xtmp,ytmp,ztmp,delx,dely,delz,rsq,rsq1,rsq2,r1,r2;
   double factor_hb,force_angle,force_kernel,evdwl,eng_lj;
   double c,s,a,b,ac,a11,a12,a22,vx1,vx2,vy1,vy2,vz1,vz2;
   double fi[3],fj[3],delr1[3],delr2[3];
   double r2inv,r10inv;
   double switch1,switch2;
-  int *ilist,*jlist,*klist,*numneigh,**firstneigh;
+  int *ilist,*jlist,*numneigh,**firstneigh;
   Param *pm;
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int **special = atom->special;
-  int **nspecial = atom->nspecial;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const double * const special_lj = force->special_lj;
+  const int * const * const nspecial = atom->nspecial;
+  const int * const * const special = atom->special;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -152,8 +148,8 @@ void PairHbondDreidingLJOMP::eval(double **f, int iifrom, int iito, int tid)
     itype = type[i];
     if (!donor[itype]) continue;
 
-    klist = special[i];
-    knum = nspecial[i][0];
+    const int * const klist = special[i];
+    const int knum = nspecial[i][0];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
@@ -270,7 +266,7 @@ void PairHbondDreidingLJOMP::eval(double **f, int iifrom, int iito, int tid)
 
 	    // KIJ instead of IJK b/c delr1/delr2 are both with respect to k
 
-	    if (EVFLAG) ev_tally3_thr(this,k,i,j,evdwl,0.0,fi,fj,delr1,delr2,tid);
+	    if (EVFLAG) ev_tally3_thr(this,k,i,j,evdwl,0.0,fi,fj,delr1,delr2,thr);
 	    if (EFLAG) {
 	      hbcount++;
 	      hbeng += evdwl;
@@ -283,6 +279,7 @@ void PairHbondDreidingLJOMP::eval(double **f, int iifrom, int iito, int tid)
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
+  const int tid = thr->get_tid();
   hbcount_thr[tid] = static_cast<double>(hbcount);
   hbeng_thr[tid] = hbeng;
 }
diff --git a/src/USER-OMP/pair_hbond_dreiding_lj_omp.h b/src/USER-OMP/pair_hbond_dreiding_lj_omp.h
index 1aef78490c..9373916849 100644
--- a/src/USER-OMP/pair_hbond_dreiding_lj_omp.h
+++ b/src/USER-OMP/pair_hbond_dreiding_lj_omp.h
@@ -43,7 +43,7 @@ class PairHbondDreidingLJOMP : public PairHbondDreidingLJ, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp b/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp
index b6c966f8c7..bce4efdd3a 100644
--- a/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp
+++ b/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp
@@ -31,7 +31,7 @@ using namespace MathConst;
 /* ---------------------------------------------------------------------- */
 
 PairHbondDreidingMorseOMP::PairHbondDreidingMorseOMP(LAMMPS *lmp) :
-  PairHbondDreidingMorse(lmp), ThrOMP(lmp, PAIR)
+  PairHbondDreidingMorse(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
   hbcount_thr = hbeng_thr = NULL;
@@ -54,7 +54,6 @@ void PairHbondDreidingMorseOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -72,35 +71,31 @@ void PairHbondDreidingMorseOMP::compute(int eflag, int vflag)
   }
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
-
   // reduce per thread hbond data
   if (eflag_global) {
     pvector[0] = 0.0;
@@ -113,24 +108,25 @@ void PairHbondDreidingMorseOMP::compute(int eflag, int vflag)
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairHbondDreidingMorseOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairHbondDreidingMorseOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
-  int i,j,k,m,ii,jj,kk,jnum,knum,itype,jtype,ktype;
+  int i,j,k,m,ii,jj,kk,jnum,itype,jtype,ktype;
   double xtmp,ytmp,ztmp,delx,dely,delz,rsq,rsq1,rsq2,r1,r2;
   double factor_hb,force_angle,force_kernel,evdwl;
   double c,s,a,b,ac,a11,a12,a22,vx1,vx2,vy1,vy2,vz1,vz2;
   double fi[3],fj[3],delr1[3],delr2[3];
   double r,dr,dexp,eng_morse,switch1,switch2;
-  int *ilist,*jlist,*klist,*numneigh,**firstneigh;
+  int *ilist,*jlist,*numneigh,**firstneigh;
   Param *pm;
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int **special = atom->special;
-  int **nspecial = atom->nspecial;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const double * const special_lj = force->special_lj;
+  const int * const * const nspecial = atom->nspecial;
+  const int * const * const special = atom->special;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -151,8 +147,8 @@ void PairHbondDreidingMorseOMP::eval(double **f, int iifrom, int iito, int tid)
     itype = type[i];
     if (!donor[itype]) continue;
 
-    klist = special[i];
-    knum = nspecial[i][0];
+    const int * const klist = special[i];
+    const int knum = nspecial[i][0];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
@@ -268,7 +264,7 @@ void PairHbondDreidingMorseOMP::eval(double **f, int iifrom, int iito, int tid)
 
 	    // KIJ instead of IJK b/c delr1/delr2 are both with respect to k
 
-	    if (EVFLAG) ev_tally3_thr(this,k,i,j,evdwl,0.0,fi,fj,delr1,delr2,tid);
+	    if (EVFLAG) ev_tally3_thr(this,k,i,j,evdwl,0.0,fi,fj,delr1,delr2,thr);
 	    if (EFLAG) {
 	      hbcount++;
 	      hbeng += evdwl;
@@ -281,6 +277,7 @@ void PairHbondDreidingMorseOMP::eval(double **f, int iifrom, int iito, int tid)
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
+  const int tid = thr->get_tid();
   hbcount_thr[tid] = static_cast<double>(hbcount);
   hbeng_thr[tid] = hbeng;
 }
diff --git a/src/USER-OMP/pair_hbond_dreiding_morse_omp.h b/src/USER-OMP/pair_hbond_dreiding_morse_omp.h
index 2a13c618c6..d2edd7281b 100644
--- a/src/USER-OMP/pair_hbond_dreiding_morse_omp.h
+++ b/src/USER-OMP/pair_hbond_dreiding_morse_omp.h
@@ -43,7 +43,7 @@ class PairHbondDreidingMorseOMP : public PairHbondDreidingMorse, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj96_cut_omp.cpp b/src/USER-OMP/pair_lj96_cut_omp.cpp
index f0998363e1..68733c1093 100644
--- a/src/USER-OMP/pair_lj96_cut_omp.cpp
+++ b/src/USER-OMP/pair_lj96_cut_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJ96CutOMP::PairLJ96CutOMP(LAMMPS *lmp) :
-  PairLJ96Cut(lmp), ThrOMP(lmp, PAIR)
+  PairLJ96Cut(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairLJ96CutOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,38 +43,34 @@ void PairLJ96CutOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJ96CutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJ96CutOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -84,10 +79,11 @@ void PairLJ96CutOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -141,8 +137,8 @@ void PairLJ96CutOMP::eval(double **f, int iifrom, int iito, int tid)
 	  evdwl *= factor_lj;
 	}
 
-	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_lj96_cut_omp.h b/src/USER-OMP/pair_lj96_cut_omp.h
index 333212303d..a8040320c8 100644
--- a/src/USER-OMP/pair_lj96_cut_omp.h
+++ b/src/USER-OMP/pair_lj96_cut_omp.h
@@ -39,7 +39,7 @@ class PairLJ96CutOMP : public PairLJ96Cut, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp
index 32ad05acda..edfbe1f527 100644
--- a/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp
+++ b/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJCharmmCoulCharmmImplicitOMP::PairLJCharmmCoulCharmmImplicitOMP(LAMMPS *lmp) :
-  PairLJCharmmCoulCharmmImplicit(lmp), ThrOMP(lmp, PAIR)
+  PairLJCharmmCoulCharmmImplicit(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairLJCharmmCoulCharmmImplicitOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,64 +43,60 @@ void PairLJCharmmCoulCharmmImplicitOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCharmmCoulCharmmImplicitOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJCharmmCoulCharmmImplicitOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
   double philj,switch1,switch2;
-  double invdenom_coul,invdenom_lj;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_coul = force->special_coul;
-  double *special_lj = force->special_lj;
-  double qqrd2e = force->qqrd2e;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double * const special_lj = force->special_lj;
+  const double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
-  invdenom_coul = (denom_coul != 0.0) ? 1.0/denom_coul : 0.0;
-  invdenom_lj   = (denom_lj   != 0.0) ? 1.0/denom_lj   : 0.0;
+  const double invdenom_coul = (denom_coul != 0.0) ? 1.0/denom_coul : 0.0;
+  const double invdenom_lj   = (denom_lj   != 0.0) ? 1.0/denom_lj   : 0.0;
 
   // loop over neighbors of my atoms
 
@@ -193,7 +188,7 @@ void PairLJCharmmCoulCharmmImplicitOMP::eval(double **f, int iifrom, int iito, i
 	}
 	
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,ecoul,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.h b/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.h
index ba016d7d3d..dff01ce499 100644
--- a/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.h
+++ b/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.h
@@ -39,7 +39,7 @@ class PairLJCharmmCoulCharmmImplicitOMP : public PairLJCharmmCoulCharmmImplicit,
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp
index 6dac7a17f6..efdcc995da 100644
--- a/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp
+++ b/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJCharmmCoulCharmmOMP::PairLJCharmmCoulCharmmOMP(LAMMPS *lmp) :
-  PairLJCharmmCoulCharmm(lmp), ThrOMP(lmp, PAIR)
+  PairLJCharmmCoulCharmm(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairLJCharmmCoulCharmmOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,64 +43,60 @@ void PairLJCharmmCoulCharmmOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCharmmCoulCharmmOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJCharmmCoulCharmmOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
   double philj,switch1,switch2;
-  double invdenom_coul,invdenom_lj;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_coul = force->special_coul;
-  double *special_lj = force->special_lj;
-  double qqrd2e = force->qqrd2e;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double * const special_lj = force->special_lj;
+  const double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
-  invdenom_coul = (denom_coul != 0.0) ? 1.0/denom_coul : 0.0;
-  invdenom_lj   = (denom_lj   != 0.0) ? 1.0/denom_lj   : 0.0;
+  const double invdenom_coul = (denom_coul != 0.0) ? 1.0/denom_coul : 0.0;
+  const double invdenom_lj   = (denom_lj   != 0.0) ? 1.0/denom_lj   : 0.0;
 
   // loop over neighbors of my atoms
 
@@ -193,7 +188,7 @@ void PairLJCharmmCoulCharmmOMP::eval(double **f, int iifrom, int iito, int tid)
 	}
 	
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,ecoul,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.h b/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.h
index f2889b05fe..0eda030ebd 100644
--- a/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.h
+++ b/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.h
@@ -39,7 +39,7 @@ class PairLJCharmmCoulCharmmOMP : public PairLJCharmmCoulCharmm, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp
index c99f27f2e1..f9f32ea119 100644
--- a/src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp
+++ b/src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp
@@ -33,7 +33,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJCharmmCoulLongOMP::PairLJCharmmCoulLongOMP(LAMMPS *lmp) :
-  PairLJCharmmCoulLong(lmp), ThrOMP(lmp, PAIR)
+  PairLJCharmmCoulLong(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -44,7 +44,6 @@ void PairLJCharmmCoulLongOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -52,40 +51,36 @@ void PairLJCharmmCoulLongOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCharmmCoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJCharmmCoulLongOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype,itable;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
@@ -97,13 +92,14 @@ void PairLJCharmmCoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_coul = force->special_coul;
-  double *special_lj = force->special_lj;
-  double qqrd2e = force->qqrd2e;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double * const special_lj = force->special_lj;
+  const double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -214,7 +210,7 @@ void PairLJCharmmCoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
 	}
 	
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,ecoul,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_lj_charmm_coul_long_omp.h b/src/USER-OMP/pair_lj_charmm_coul_long_omp.h
index b14e4c1fe4..91b9c01c1a 100644
--- a/src/USER-OMP/pair_lj_charmm_coul_long_omp.h
+++ b/src/USER-OMP/pair_lj_charmm_coul_long_omp.h
@@ -39,7 +39,7 @@ class PairLJCharmmCoulLongOMP : public PairLJCharmmCoulLong, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp b/src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp
index 0321882793..e54c348e64 100644
--- a/src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp
+++ b/src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJClass2CoulCutOMP::PairLJClass2CoulCutOMP(LAMMPS *lmp) :
-  PairLJClass2CoulCut(lmp), ThrOMP(lmp, PAIR)
+  PairLJClass2CoulCut(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairLJClass2CoulCutOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,40 +43,36 @@ void PairLJClass2CoulCutOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJClass2CoulCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJClass2CoulCutOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
@@ -87,13 +82,14 @@ void PairLJClass2CoulCutOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_coul = force->special_coul;
-  double *special_lj = force->special_lj;
-  double qqrd2e = force->qqrd2e;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double * const special_lj = force->special_lj;
+  const double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -163,9 +159,9 @@ void PairLJClass2CoulCutOMP::eval(double **f, int iifrom, int iito, int tid)
 	    evdwl *= factor_lj;
 	  } else evdwl = 0.0;
 	}
-	
+
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,ecoul,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_lj_class2_coul_cut_omp.h b/src/USER-OMP/pair_lj_class2_coul_cut_omp.h
index 5fe4895691..b22a29aa18 100644
--- a/src/USER-OMP/pair_lj_class2_coul_cut_omp.h
+++ b/src/USER-OMP/pair_lj_class2_coul_cut_omp.h
@@ -39,7 +39,7 @@ class PairLJClass2CoulCutOMP : public PairLJClass2CoulCut, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_class2_coul_long_omp.cpp b/src/USER-OMP/pair_lj_class2_coul_long_omp.cpp
index 84d26ceb14..20ad947d23 100644
--- a/src/USER-OMP/pair_lj_class2_coul_long_omp.cpp
+++ b/src/USER-OMP/pair_lj_class2_coul_long_omp.cpp
@@ -33,7 +33,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJClass2CoulLongOMP::PairLJClass2CoulLongOMP(LAMMPS *lmp) :
-  PairLJClass2CoulLong(lmp), ThrOMP(lmp, PAIR)
+  PairLJClass2CoulLong(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -44,7 +44,6 @@ void PairLJClass2CoulLongOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -52,40 +51,36 @@ void PairLJClass2CoulLongOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJClass2CoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJClass2CoulLongOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
@@ -95,13 +90,14 @@ void PairLJClass2CoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_coul = force->special_coul;
-  double *special_lj = force->special_lj;
-  double qqrd2e = force->qqrd2e;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double * const special_lj = force->special_lj;
+  const double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -181,7 +177,7 @@ void PairLJClass2CoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
 	}
 	
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,ecoul,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_lj_class2_coul_long_omp.h b/src/USER-OMP/pair_lj_class2_coul_long_omp.h
index da4ac3680f..b32799bf84 100644
--- a/src/USER-OMP/pair_lj_class2_coul_long_omp.h
+++ b/src/USER-OMP/pair_lj_class2_coul_long_omp.h
@@ -39,7 +39,7 @@ class PairLJClass2CoulLongOMP : public PairLJClass2CoulLong, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_class2_omp.cpp b/src/USER-OMP/pair_lj_class2_omp.cpp
index 4f5d2550fc..cff80d3f1d 100644
--- a/src/USER-OMP/pair_lj_class2_omp.cpp
+++ b/src/USER-OMP/pair_lj_class2_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJClass2OMP::PairLJClass2OMP(LAMMPS *lmp) :
-  PairLJClass2(lmp), ThrOMP(lmp, PAIR)
+  PairLJClass2(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairLJClass2OMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,38 +43,34 @@ void PairLJClass2OMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJClass2OMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJClass2OMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -84,10 +79,11 @@ void PairLJClass2OMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -141,8 +137,8 @@ void PairLJClass2OMP::eval(double **f, int iifrom, int iito, int tid)
 	  evdwl *= factor_lj;
 	}
 
-	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_lj_class2_omp.h b/src/USER-OMP/pair_lj_class2_omp.h
index cfe24bb714..317c7376c5 100644
--- a/src/USER-OMP/pair_lj_class2_omp.h
+++ b/src/USER-OMP/pair_lj_class2_omp.h
@@ -39,7 +39,7 @@ class PairLJClass2OMP : public PairLJClass2, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_coul_omp.cpp b/src/USER-OMP/pair_lj_coul_omp.cpp
index 23e2a8d906..ae15087ba9 100644
--- a/src/USER-OMP/pair_lj_coul_omp.cpp
+++ b/src/USER-OMP/pair_lj_coul_omp.cpp
@@ -34,7 +34,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJCoulOMP::PairLJCoulOMP(LAMMPS *lmp) :
-  PairLJCoul(lmp), ThrOMP(lmp, PAIR)
+  PairLJCoul(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -45,7 +45,6 @@ void PairLJCoulOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -53,53 +52,50 @@ void PairLJCoulOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCoulOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJCoulOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   double evdwl,ecoul,fpair;
   evdwl = ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_coul = force->special_coul;
-  double *special_lj = force->special_lj;
-  double qqrd2e = force->qqrd2e;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double * const special_lj = force->special_lj;
+  const double qqrd2e = force->qqrd2e;
 
-  double *x0 = x[0];
+  const double *x0 = x[0];
   double *f0 = f[0], *fi = f0;
 
   int *ilist = list->ilist;
@@ -127,7 +123,7 @@ void PairLJCoulOMP::eval(double **f, int iifrom, int iito, int tid)
       ni = sbmask(j);
       j &= NEIGHMASK;
       
-      { register double *xj = x0+(j+(j<<1));
+      { register const double *xj = x0+(j+(j<<1));
 	d[0] = xi[0] - xj[0];				// pair vector
 	d[1] = xi[1] - xj[1];
 	d[2] = xi[2] - xj[2]; }
@@ -218,7 +214,7 @@ void PairLJCoulOMP::eval(double **f, int iifrom, int iito, int tid)
       }
       
       if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
-			       evdwl,ecoul,fpair,d[0],d[1],d[2],tid);
+			       evdwl,ecoul,fpair,d[0],d[1],d[2],thr);
     }
   }
 }
diff --git a/src/USER-OMP/pair_lj_coul_omp.h b/src/USER-OMP/pair_lj_coul_omp.h
index 619e609ba8..e2259e16a0 100644
--- a/src/USER-OMP/pair_lj_coul_omp.h
+++ b/src/USER-OMP/pair_lj_coul_omp.h
@@ -39,7 +39,7 @@ class PairLJCoulOMP : public PairLJCoul, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_cubic_omp.cpp b/src/USER-OMP/pair_lj_cubic_omp.cpp
index 4f806bd71f..09e44a9107 100644
--- a/src/USER-OMP/pair_lj_cubic_omp.cpp
+++ b/src/USER-OMP/pair_lj_cubic_omp.cpp
@@ -26,7 +26,7 @@ using namespace PairLJCubicConstants;
 /* ---------------------------------------------------------------------- */
 
 PairLJCubicOMP::PairLJCubicOMP(LAMMPS *lmp) :
-  PairLJCubic(lmp), ThrOMP(lmp, PAIR)
+  PairLJCubic(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -37,7 +37,6 @@ void PairLJCubicOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -45,38 +44,34 @@ void PairLJCubicOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCubicOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJCubicOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -86,10 +81,11 @@ void PairLJCubicOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -152,8 +148,8 @@ void PairLJCubicOMP::eval(double **f, int iifrom, int iito, int tid)
 	  evdwl *= factor_lj;
 	}
 
-	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_lj_cubic_omp.h b/src/USER-OMP/pair_lj_cubic_omp.h
index 559a6125ab..a6ed7d2b97 100644
--- a/src/USER-OMP/pair_lj_cubic_omp.h
+++ b/src/USER-OMP/pair_lj_cubic_omp.h
@@ -39,7 +39,7 @@ class PairLJCubicOMP : public PairLJCubic, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp
index be98ec38fc..46114ce613 100644
--- a/src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp
+++ b/src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJCutCoulCutOMP::PairLJCutCoulCutOMP(LAMMPS *lmp) :
-  PairLJCutCoulCut(lmp), ThrOMP(lmp, PAIR)
+  PairLJCutCoulCut(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairLJCutCoulCutOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,40 +43,36 @@ void PairLJCutCoulCutOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutCoulCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJCutCoulCutOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
@@ -86,13 +81,14 @@ void PairLJCutCoulCutOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_coul = force->special_coul;
-  double *special_lj = force->special_lj;
-  double qqrd2e = force->qqrd2e;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double * const special_lj = force->special_lj;
+  const double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -159,11 +155,11 @@ void PairLJCutCoulCutOMP::eval(double **f, int iifrom, int iito, int tid)
 	    evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
 	      offset[itype][jtype];
 	    evdwl *= factor_lj;
-	  }
-	} else evdwl = 0.0;
+	  } else evdwl = 0.0;
+	}
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,ecoul,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_lj_cut_coul_cut_omp.h b/src/USER-OMP/pair_lj_cut_coul_cut_omp.h
index c8c34e2591..3d4be420e7 100644
--- a/src/USER-OMP/pair_lj_cut_coul_cut_omp.h
+++ b/src/USER-OMP/pair_lj_cut_coul_cut_omp.h
@@ -39,7 +39,7 @@ class PairLJCutCoulCutOMP : public PairLJCutCoulCut, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp
index 13a4a1906f..9d96f31dba 100644
--- a/src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp
+++ b/src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJCutCoulDebyeOMP::PairLJCutCoulDebyeOMP(LAMMPS *lmp) :
-  PairLJCutCoulDebye(lmp), ThrOMP(lmp, PAIR)
+  PairLJCutCoulDebye(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairLJCutCoulDebyeOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,40 +43,36 @@ void PairLJCutCoulDebyeOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutCoulDebyeOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJCutCoulDebyeOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
@@ -87,13 +82,14 @@ void PairLJCutCoulDebyeOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_coul = force->special_coul;
-  double *special_lj = force->special_lj;
-  double qqrd2e = force->qqrd2e;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double * const special_lj = force->special_lj;
+  const double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -129,7 +125,6 @@ void PairLJCutCoulDebyeOMP::eval(double **f, int iifrom, int iito, int tid)
       if (rsq < cutsq[itype][jtype]) {
 	r2inv = 1.0/rsq;
 
-
 	if (rsq < cut_coulsq[itype][jtype]) {
 	  r = sqrt(rsq);
 	  rinv = 1.0/r;
@@ -165,8 +160,9 @@ void PairLJCutCoulDebyeOMP::eval(double **f, int iifrom, int iito, int tid)
 	    evdwl *= factor_lj;
 	  } else evdwl = 0.0;
 	}
+
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,ecoul,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_lj_cut_coul_debye_omp.h b/src/USER-OMP/pair_lj_cut_coul_debye_omp.h
index 00cf540be2..e2205cb7ce 100644
--- a/src/USER-OMP/pair_lj_cut_coul_debye_omp.h
+++ b/src/USER-OMP/pair_lj_cut_coul_debye_omp.h
@@ -39,7 +39,7 @@ class PairLJCutCoulDebyeOMP : public PairLJCutCoulDebye, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_cut_coul_long_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_long_omp.cpp
index 1d8f977c96..79976bf8a8 100644
--- a/src/USER-OMP/pair_lj_cut_coul_long_omp.cpp
+++ b/src/USER-OMP/pair_lj_cut_coul_long_omp.cpp
@@ -33,7 +33,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJCutCoulLongOMP::PairLJCutCoulLongOMP(LAMMPS *lmp) :
-  PairLJCutCoulLong(lmp), ThrOMP(lmp, PAIR)
+  PairLJCutCoulLong(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -44,7 +44,6 @@ void PairLJCutCoulLongOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -52,40 +51,36 @@ void PairLJCutCoulLongOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutCoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJCutCoulLongOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype,itable;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
@@ -96,13 +91,14 @@ void PairLJCutCoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_coul = force->special_coul;
-  double *special_lj = force->special_lj;
-  double qqrd2e = force->qqrd2e;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double * const special_lj = force->special_lj;
+  const double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -198,9 +194,9 @@ void PairLJCutCoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
 	    evdwl *= factor_lj;
 	  } else evdwl = 0.0;
 	}
-	
+
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,ecoul,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_lj_cut_coul_long_omp.h b/src/USER-OMP/pair_lj_cut_coul_long_omp.h
index ac408ba886..a907959ae3 100644
--- a/src/USER-OMP/pair_lj_cut_coul_long_omp.h
+++ b/src/USER-OMP/pair_lj_cut_coul_long_omp.h
@@ -39,7 +39,7 @@ class PairLJCutCoulLongOMP : public PairLJCutCoulLong, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.cpp
index 6ada944c53..78f35709a2 100644
--- a/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.cpp
+++ b/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.cpp
@@ -36,7 +36,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJCutCoulLongTIP4POMP::PairLJCutCoulLongTIP4POMP(LAMMPS *lmp) :
-  PairLJCutCoulLongTIP4P(lmp), ThrOMP(lmp, PAIR)
+  PairLJCutCoulLongTIP4P(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 
@@ -61,7 +61,6 @@ void PairLJCutCoulLongTIP4POMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nlocal = atom->nlocal;
@@ -76,8 +75,8 @@ void PairLJCutCoulLongTIP4POMP::compute(int eflag, int vflag)
   }
 
   // cache corrected M positions in mpos[]
-  double **x = atom->x;
-  int *type = atom->type;
+  const double * const * const x = atom->x;
+  const int * const type = atom->type;
   for (int i = 0; i < nlocal; i++) {
     if (type[i] == typeO) {
       find_M(i,h1idx[i],h2idx[i],mpos[i]);
@@ -101,39 +100,35 @@ void PairLJCutCoulLongTIP4POMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (vflag) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (vflag) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (vflag) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (vflag) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      eval<0,0,0>(f, ifrom, ito, tid);
+      eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int VFLAG>
-void PairLJCutCoulLongTIP4POMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJCutCoulLongTIP4POMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype,itable;
   int n,vlist[6];
@@ -151,13 +146,14 @@ void PairLJCutCoulLongTIP4POMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_coul = force->special_coul;
-  double *special_lj = force->special_lj;
-  double qqrd2e = force->qqrd2e;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double * const special_lj = force->special_lj;
+  const double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -216,7 +212,7 @@ void PairLJCutCoulLongTIP4POMP::eval(double **f, int iifrom, int iito, int tid)
 	  } else evdwl = 0.0;
 
 	  if (EVFLAG) ev_tally_thr(this,i,j,nlocal, /* newton_pair = */ 1,
-				   evdwl,0.0,forcelj,delx,dely,delz,tid);
+				   evdwl,0.0,forcelj,delx,dely,delz,thr);
 	}
 
 	// adjust rsq and delxyz for off-site O charge(s)
@@ -423,7 +419,7 @@ void PairLJCutCoulLongTIP4POMP::eval(double **f, int iifrom, int iito, int tid)
 	    if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
 	  } else ecoul = 0.0;
 
-	  if (EVFLAG) ev_tally_list_thr(this,n,vlist,ecoul,v,tid);
+	  if (EVFLAG) ev_tally_list_thr(this,n,vlist,ecoul,v,thr);
 	}
       }
     }
diff --git a/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.h b/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.h
index 093fc0216b..ff49bdcedb 100644
--- a/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.h
+++ b/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.h
@@ -39,7 +39,6 @@ class PairLJCutCoulLongTIP4POMP : public PairLJCutCoulLongTIP4P, public ThrOMP {
   virtual double memory_usage();
 
  protected:
-
   // this is to cache m-shift corrected positions.
   int maxmpos;        // size of the following arrays
   int *h1idx, *h2idx; // local index of hydrogen atoms
@@ -48,7 +47,7 @@ class PairLJCutCoulLongTIP4POMP : public PairLJCutCoulLongTIP4P, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int VFLAG>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_cut_omp.cpp
index 3d82149fec..4932a784bb 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_lj_cut_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
-  PairLJCut(lmp), ThrOMP(lmp, PAIR)
+  PairLJCut(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairLJCutOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,38 +43,34 @@ void PairLJCutOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJCutOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -84,10 +79,11 @@ void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -139,8 +135,8 @@ void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
 	  evdwl *= factor_lj;
 	}
 
-	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_lj_cut_omp.h b/src/USER-OMP/pair_lj_cut_omp.h
index 56f9f9b8a5..f97996e480 100644
--- a/src/USER-OMP/pair_lj_cut_omp.h
+++ b/src/USER-OMP/pair_lj_cut_omp.h
@@ -39,7 +39,7 @@ class PairLJCutOMP : public PairLJCut, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_expand_omp.cpp b/src/USER-OMP/pair_lj_expand_omp.cpp
index 7b06503ee4..4f93d3bd42 100644
--- a/src/USER-OMP/pair_lj_expand_omp.cpp
+++ b/src/USER-OMP/pair_lj_expand_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJExpandOMP::PairLJExpandOMP(LAMMPS *lmp) :
-  PairLJExpand(lmp), ThrOMP(lmp, PAIR)
+  PairLJExpand(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairLJExpandOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,38 +43,34 @@ void PairLJExpandOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJExpandOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJExpandOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -85,10 +80,11 @@ void PairLJExpandOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -143,8 +139,8 @@ void PairLJExpandOMP::eval(double **f, int iifrom, int iito, int tid)
 	  evdwl *= factor_lj;
 	}
 
-	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_lj_expand_omp.h b/src/USER-OMP/pair_lj_expand_omp.h
index 29488deae8..9ff8d3080a 100644
--- a/src/USER-OMP/pair_lj_expand_omp.h
+++ b/src/USER-OMP/pair_lj_expand_omp.h
@@ -39,7 +39,7 @@ class PairLJExpandOMP : public PairLJExpand, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp b/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp
index 2e97fa1b5e..ca8875c7f8 100644
--- a/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp
+++ b/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJGromacsCoulGromacsOMP::PairLJGromacsCoulGromacsOMP(LAMMPS *lmp) :
-  PairLJGromacsCoulGromacs(lmp), ThrOMP(lmp, PAIR)
+  PairLJGromacsCoulGromacs(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairLJGromacsCoulGromacsOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,40 +43,36 @@ void PairLJGromacsCoulGromacsOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJGromacsCoulGromacsOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJGromacsCoulGromacsOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
@@ -87,13 +82,14 @@ void PairLJGromacsCoulGromacsOMP::eval(double **f, int iifrom, int iito, int tid
 
   evdwl = ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_coul = force->special_coul;
-  double *special_lj = force->special_lj;
-  double qqrd2e = force->qqrd2e;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double * const special_lj = force->special_lj;
+  const double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -190,7 +186,7 @@ void PairLJGromacsCoulGromacsOMP::eval(double **f, int iifrom, int iito, int tid
 	}
 	
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,ecoul,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.h b/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.h
index d789bd6797..ee506c2c4a 100644
--- a/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.h
+++ b/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.h
@@ -39,7 +39,7 @@ class PairLJGromacsCoulGromacsOMP : public PairLJGromacsCoulGromacs, public ThrO
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_gromacs_omp.cpp b/src/USER-OMP/pair_lj_gromacs_omp.cpp
index f1c7d2faf9..abdc4c5ccf 100644
--- a/src/USER-OMP/pair_lj_gromacs_omp.cpp
+++ b/src/USER-OMP/pair_lj_gromacs_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJGromacsOMP::PairLJGromacsOMP(LAMMPS *lmp) :
-  PairLJGromacs(lmp), ThrOMP(lmp, PAIR)
+  PairLJGromacs(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairLJGromacsOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,38 +43,34 @@ void PairLJGromacsOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJGromacsOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJGromacsOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -85,10 +80,11 @@ void PairLJGromacsOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -151,8 +147,8 @@ void PairLJGromacsOMP::eval(double **f, int iifrom, int iito, int tid)
 	  evdwl *= factor_lj;
 	}
 
-	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_lj_gromacs_omp.h b/src/USER-OMP/pair_lj_gromacs_omp.h
index d192a414ef..8e0f4bd281 100644
--- a/src/USER-OMP/pair_lj_gromacs_omp.h
+++ b/src/USER-OMP/pair_lj_gromacs_omp.h
@@ -39,7 +39,7 @@ class PairLJGromacsOMP : public PairLJGromacs, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_sf_omp.cpp b/src/USER-OMP/pair_lj_sf_omp.cpp
index 55ee908e47..47cc23bf91 100644
--- a/src/USER-OMP/pair_lj_sf_omp.cpp
+++ b/src/USER-OMP/pair_lj_sf_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJShiftedForceOMP::PairLJShiftedForceOMP(LAMMPS *lmp) :
-  PairLJShiftedForce(lmp), ThrOMP(lmp, PAIR)
+  PairLJShiftedForce(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairLJShiftedForceOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,38 +43,34 @@ void PairLJShiftedForceOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJShiftedForceOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJShiftedForceOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -84,10 +79,11 @@ void PairLJShiftedForceOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -142,8 +138,8 @@ void PairLJShiftedForceOMP::eval(double **f, int iifrom, int iito, int tid)
 	  evdwl *= factor_lj;
 	}
 
-	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_lj_sf_omp.h b/src/USER-OMP/pair_lj_sf_omp.h
index 6fba43fb8f..c73c8f746b 100644
--- a/src/USER-OMP/pair_lj_sf_omp.h
+++ b/src/USER-OMP/pair_lj_sf_omp.h
@@ -39,7 +39,7 @@ class PairLJShiftedForceOMP : public PairLJShiftedForce, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_smooth_omp.cpp b/src/USER-OMP/pair_lj_smooth_omp.cpp
index 1ad88044a6..4bf9ceb41c 100644
--- a/src/USER-OMP/pair_lj_smooth_omp.cpp
+++ b/src/USER-OMP/pair_lj_smooth_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJSmoothOMP::PairLJSmoothOMP(LAMMPS *lmp) :
-  PairLJSmooth(lmp), ThrOMP(lmp, PAIR)
+  PairLJSmooth(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairLJSmoothOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,38 +43,34 @@ void PairLJSmoothOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJSmoothOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJSmoothOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -85,10 +80,11 @@ void PairLJSmoothOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -155,8 +151,8 @@ void PairLJSmoothOMP::eval(double **f, int iifrom, int iito, int tid)
 	  evdwl *= factor_lj;
 	}
 
-	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_lj_smooth_omp.h b/src/USER-OMP/pair_lj_smooth_omp.h
index de27a4008d..eb6eb92dec 100644
--- a/src/USER-OMP/pair_lj_smooth_omp.h
+++ b/src/USER-OMP/pair_lj_smooth_omp.h
@@ -39,7 +39,7 @@ class PairLJSmoothOMP : public PairLJSmooth, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_morse_omp.cpp b/src/USER-OMP/pair_morse_omp.cpp
index a53e35a977..f61fd4e383 100644
--- a/src/USER-OMP/pair_morse_omp.cpp
+++ b/src/USER-OMP/pair_morse_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairMorseOMP::PairMorseOMP(LAMMPS *lmp) :
-  PairMorse(lmp), ThrOMP(lmp, PAIR)
+  PairMorse(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairMorseOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,38 +43,34 @@ void PairMorseOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairMorseOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairMorseOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -84,10 +79,11 @@ void PairMorseOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -139,8 +135,8 @@ void PairMorseOMP::eval(double **f, int iifrom, int iito, int tid)
 	  evdwl *= factor_lj;
 	}
 
-	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_morse_omp.h b/src/USER-OMP/pair_morse_omp.h
index a966e6f11f..a20aad6716 100644
--- a/src/USER-OMP/pair_morse_omp.h
+++ b/src/USER-OMP/pair_morse_omp.h
@@ -39,7 +39,7 @@ class PairMorseOMP : public PairMorse, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_peri_lps_omp.cpp b/src/USER-OMP/pair_peri_lps_omp.cpp
index 7cb1e83086..e052271e4f 100644
--- a/src/USER-OMP/pair_peri_lps_omp.cpp
+++ b/src/USER-OMP/pair_peri_lps_omp.cpp
@@ -26,15 +26,18 @@
 #include "modify.h"
 #include "neighbor.h"
 #include "neigh_list.h"
+#include "math_const.h"
 
 using namespace LAMMPS_NS;
+using namespace MathConst;
 
 /* ---------------------------------------------------------------------- */
 
 PairPeriLPSOMP::PairPeriLPSOMP(LAMMPS *lmp) :
-  PairPeriLPS(lmp), ThrOMP(lmp, PAIR)
+  PairPeriLPS(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
+  fix_name = "PERI_NEIGH_OMP";
 }
 
 /* ---------------------------------------------------------------------- */
@@ -43,7 +46,6 @@ void PairPeriLPSOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -61,38 +63,34 @@ void PairPeriLPSOMP::compute(int eflag, int vflag)
   }
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairPeriLPSOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz;
@@ -103,9 +101,10 @@ void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
   double fxtmp,fytmp,fztmp;
 
   double *vfrac = atom->vfrac;
@@ -151,7 +150,7 @@ void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid)
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
- 
+
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
@@ -182,7 +181,7 @@ void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid)
 	// of the bond-based theory used in PMB model
 
         double kshort = (15.0 * 18.0 * bulkmodulus[itype][itype]) /
-	  (3.141592653589793 * cutsq[itype][jtype] * cutsq[itype][jtype]);
+	  (MY_PI * cutsq[itype][jtype] * cutsq[itype][jtype]);
         rk = (kshort * vfrac[j]) * (dr / cut[itype][jtype]);
 
         if (r > 0.0) fpair = -(rk/r);
@@ -199,7 +198,7 @@ void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid)
 
         if (EFLAG) evdwl = 0.5*rk*dr;
 	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,evdwl,0.0,
-				 fpair*vfrac[i],delx,dely,delz,tid);
+				 fpair*vfrac[i],delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
@@ -214,7 +213,7 @@ void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid)
 #if defined(_OPENMP)
   // each thread works on a fixed chunk of atoms.
   const int idelta = 1 + nlocal/comm->nthreads;
-  iifrom = tid*idelta;
+  iifrom = thr->get_tid()*idelta;
   iito   = iifrom + idelta;
   if (iito > nlocal)
     iito = nlocal;
@@ -234,7 +233,7 @@ void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid)
 #endif
   { // communicate dilatation (theta) of each particle	
     comm->forward_comm_pair(this);
-    // communicate wighted volume (wvolume) upon every reneighbor
+    // communicate weighted volume (wvolume) upon every reneighbor
     if (neighbor->ago == 0)
       comm->forward_comm_fix(modify->fix[ifix_peri]);
   }
@@ -245,10 +244,8 @@ void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid)
   if (EFLAG) {
     for (i = iifrom; i < iito; i++) {   
       itype = type[i];
-      if (eflag_global)
-	eng_vdwl_thr[tid] += 0.5 * bulkmodulus[itype][itype] * (theta[i] * theta[i]);
-      if (eflag_atom)
-	eatom_thr[tid][i] += 0.5 * bulkmodulus[itype][itype] * (theta[i] * theta[i]);
+      e_tally_thr(this, i, i, nlocal, NEWTON_PAIR,
+		  0.5 * bulkmodulus[itype][itype] * (theta[i] * theta[i]), 0.0, thr);
     }
   }
 
@@ -332,7 +329,7 @@ void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid)
 		   omega_plus*(deviatoric_extension * deviatoric_extension) *
 		   vfrac[j] * vfrac_scale;
       if (EVFLAG) ev_tally_thr(this,i,i,nlocal,0,0.5*evdwl,0.0,
-			       0.5*fbond*vfrac[i],delx,dely,delz,tid);
+			       0.5*fbond*vfrac[i],delx,dely,delz,thr);
 
       // find stretch in bond I-J and break if necessary
       // use s0 from previous timestep
diff --git a/src/USER-OMP/pair_peri_lps_omp.h b/src/USER-OMP/pair_peri_lps_omp.h
index 2068830ca0..f234a41098 100644
--- a/src/USER-OMP/pair_peri_lps_omp.h
+++ b/src/USER-OMP/pair_peri_lps_omp.h
@@ -43,7 +43,7 @@ class PairPeriLPSOMP : public PairPeriLPS, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_peri_pmb_omp.cpp b/src/USER-OMP/pair_peri_pmb_omp.cpp
index 4e46d142d9..96e991bab6 100644
--- a/src/USER-OMP/pair_peri_pmb_omp.cpp
+++ b/src/USER-OMP/pair_peri_pmb_omp.cpp
@@ -32,9 +32,10 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairPeriPMBOMP::PairPeriPMBOMP(LAMMPS *lmp) :
-  PairPeriPMB(lmp), ThrOMP(lmp, PAIR)
+ PairPeriPMB(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
+  fix_name = "PERI_NEIGH_OMP";
 }
 
 /* ---------------------------------------------------------------------- */
@@ -43,7 +44,6 @@ void PairPeriPMBOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -59,38 +59,34 @@ void PairPeriPMBOMP::compute(int eflag, int vflag)
   }
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairPeriPMBOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairPeriPMBOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz;
@@ -101,9 +97,10 @@ void PairPeriPMBOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
   double fxtmp,fytmp,fztmp;
 
   double *vfrac = atom->vfrac;
@@ -148,10 +145,11 @@ void PairPeriPMBOMP::eval(double **f, int iifrom, int iito, int tid)
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
- 
+
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
+
       rsq = delx*delx + dely*dely + delz*delz;
       delx0 = xtmp0 - x0[j][0];
       dely0 = ytmp0 - x0[j][1];
@@ -190,7 +188,7 @@ void PairPeriPMBOMP::eval(double **f, int iifrom, int iito, int tid)
 
         if (EFLAG) evdwl = 0.5*rk*dr;
 	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,evdwl,0.0,
-				 fpair*vfrac[i],delx,dely,delz,tid);
+				 fpair*vfrac[i],delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
@@ -205,7 +203,7 @@ void PairPeriPMBOMP::eval(double **f, int iifrom, int iito, int tid)
 #if defined(_OPENMP)
   // each thread works on a fixed chunk of atoms.
   const int idelta = 1 + nlocal/comm->nthreads;
-  iifrom = tid*idelta;
+  iifrom = thr->get_tid()*idelta;
   iito   = iifrom + idelta;
   if (iito > nlocal)
     iito = nlocal;
@@ -278,7 +276,7 @@ void PairPeriPMBOMP::eval(double **f, int iifrom, int iito, int tid)
       if (EFLAG) evdwl = 0.5*rk*dr;
       if (EVFLAG) 
 	ev_tally_thr(this,i,i,nlocal,0,0.5*evdwl,0.0,
-		     0.5*fbond*vfrac[i],delx,dely,delz,tid);
+		     0.5*fbond*vfrac[i],delx,dely,delz,thr);
 
       // find stretch in bond I-J and break if necessary
       // use s0 from previous timestep
@@ -291,13 +289,14 @@ void PairPeriPMBOMP::eval(double **f, int iifrom, int iito, int tid)
          s0_new[i] = s00[itype][jtype] - (alpha[itype][jtype] * stretch);
       else
          s0_new[i] = MAX(s0_new[i],s00[itype][jtype] - (alpha[itype][jtype] * stretch));
+
       first = false;
     }
   }
 
   sync_threads();
 
-  // store new s0
+  // store new s0 (in parallel)
   for (i = iifrom; i < iito; i++) s0[i] = s0_new[i]; 
 }
 
diff --git a/src/USER-OMP/pair_peri_pmb_omp.h b/src/USER-OMP/pair_peri_pmb_omp.h
index 9940e5ed15..8a7fc091d9 100644
--- a/src/USER-OMP/pair_peri_pmb_omp.h
+++ b/src/USER-OMP/pair_peri_pmb_omp.h
@@ -39,7 +39,7 @@ class PairPeriPMBOMP : public PairPeriPMB, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_resquared_omp.cpp b/src/USER-OMP/pair_resquared_omp.cpp
index 4870553050..cef5aaefc5 100644
--- a/src/USER-OMP/pair_resquared_omp.cpp
+++ b/src/USER-OMP/pair_resquared_omp.cpp
@@ -27,7 +27,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairRESquaredOMP::PairRESquaredOMP(LAMMPS *lmp) :
-  PairRESquared(lmp), ThrOMP(lmp, PAIR)
+  PairRESquared(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -38,7 +38,6 @@ void PairRESquaredOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -46,40 +45,34 @@ void PairRESquaredOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f, **torque;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
-    torque = atom->torque + tid*nall;
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, torque, ifrom, ito, tid);
-	else eval<1,1,0>(f, torque, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, torque, ifrom, ito, tid);
-	else eval<1,0,0>(f, torque, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, torque, ifrom, ito, tid);
-      else eval<0,0,0>(f, torque, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces and torques into global arrays.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
-    data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairRESquaredOMP::eval(double **f, double **tor, int iifrom, int iito, int tid)
+void PairRESquaredOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double evdwl,one_eng,rsq,r2inv,r6inv,forcelj,factor_lj;
@@ -87,11 +80,12 @@ void PairRESquaredOMP::eval(double **f, double **tor, int iifrom, int iito, int
   int *ilist,*jlist,*numneigh,**firstneigh;
   RE2Vars wi,wj;
 
-  double **x = atom->x;
-  int *ellipsoid = atom->ellipsoid;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  double * const * const tor = thr->get_torque();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
 
   double fxtmp,fytmp,fztmp,t1tmp,t2tmp,t3tmp;
 
@@ -105,6 +99,7 @@ void PairRESquaredOMP::eval(double **f, double **tor, int iifrom, int iito, int
 
     i = ilist[ii];
     itype = type[i];
+    fxtmp=fytmp=fztmp=t1tmp=t2tmp=t3tmp=0.0;
 
     // not a LJ sphere
 
@@ -129,6 +124,8 @@ void PairRESquaredOMP::eval(double **f, double **tor, int iifrom, int iito, int
       // compute if less than cutoff
 
       if (rsq < cutsq[itype][jtype]) {
+	fforce[0] = fforce[1] = fforce[2] = 0.0;
+
         switch (form[itype][jtype]) {
 
          case SPHERE_SPHERE:
@@ -157,17 +154,17 @@ void PairRESquaredOMP::eval(double **f, double **tor, int iifrom, int iito, int
 
          case ELLIPSE_SPHERE:
           one_eng = resquared_lj(i,j,wi,r12,rsq,fforce,ttor,true);
-          tor[i][0] += ttor[0]*factor_lj;
-          tor[i][1] += ttor[1]*factor_lj;
-          tor[i][2] += ttor[2]*factor_lj;
+          t1tmp += ttor[0]*factor_lj;
+          t2tmp += ttor[1]*factor_lj;
+          t3tmp += ttor[2]*factor_lj;
           break;
 
          default:
           precompute_i(j,wj);
           one_eng = resquared_analytic(i,j,wi,wj,r12,rsq,fforce,ttor,rtor);
-          tor[i][0] += ttor[0]*factor_lj;
-          tor[i][1] += ttor[1]*factor_lj;
-          tor[i][2] += ttor[2]*factor_lj;
+          t1tmp += ttor[0]*factor_lj;
+          t2tmp += ttor[1]*factor_lj;
+          t3tmp += ttor[2]*factor_lj;
           if (NEWTON_PAIR || j < nlocal) {
             tor[j][0] += rtor[0]*factor_lj;
             tor[j][1] += rtor[1]*factor_lj;
@@ -179,9 +176,9 @@ void PairRESquaredOMP::eval(double **f, double **tor, int iifrom, int iito, int
         fforce[0] *= factor_lj;
         fforce[1] *= factor_lj;
         fforce[2] *= factor_lj;
-        f[i][0] += fforce[0];
-        f[i][1] += fforce[1];
-        f[i][2] += fforce[2];
+	fxtmp += fforce[0];
+	fytmp += fforce[1];
+	fztmp += fforce[2];
 
         if (NEWTON_PAIR || j < nlocal) {
           f[j][0] -= fforce[0];
@@ -193,9 +190,15 @@ void PairRESquaredOMP::eval(double **f, double **tor, int iifrom, int iito, int
 
 	if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR,
 				     evdwl,0.0,fforce[0],fforce[1],fforce[2],
-				     -r12[0],-r12[1],-r12[2],tid);
+				     -r12[0],-r12[1],-r12[2],thr);
       }
     }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+    tor[i][0] += t1tmp;
+    tor[i][1] += t2tmp;
+    tor[i][2] += t3tmp;
   }
 }
 
diff --git a/src/USER-OMP/pair_resquared_omp.h b/src/USER-OMP/pair_resquared_omp.h
index 2a50bb6dd0..53a6e2e28f 100644
--- a/src/USER-OMP/pair_resquared_omp.h
+++ b/src/USER-OMP/pair_resquared_omp.h
@@ -39,7 +39,7 @@ class PairRESquaredOMP : public PairRESquared, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, double **torque, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_soft_omp.cpp b/src/USER-OMP/pair_soft_omp.cpp
index 9f9673a28b..cbc1c9f7fd 100644
--- a/src/USER-OMP/pair_soft_omp.cpp
+++ b/src/USER-OMP/pair_soft_omp.cpp
@@ -29,7 +29,7 @@ using namespace MathConst;
 /* ---------------------------------------------------------------------- */
 
 PairSoftOMP::PairSoftOMP(LAMMPS *lmp) :
-  PairSoft(lmp), ThrOMP(lmp, PAIR)
+  PairSoft(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -40,7 +40,6 @@ void PairSoftOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -48,38 +47,34 @@ void PairSoftOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairSoftOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairSoftOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -88,10 +83,11 @@ void PairSoftOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -141,8 +137,8 @@ void PairSoftOMP::eval(double **f, int iifrom, int iito, int tid)
 	if (EFLAG)
 	  evdwl = factor_lj * prefactor[itype][jtype] * (1.0+cos(arg*r));
 
-	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_soft_omp.h b/src/USER-OMP/pair_soft_omp.h
index 840d874601..1698089521 100644
--- a/src/USER-OMP/pair_soft_omp.h
+++ b/src/USER-OMP/pair_soft_omp.h
@@ -39,7 +39,7 @@ class PairSoftOMP : public PairSoft, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_sw_omp.cpp b/src/USER-OMP/pair_sw_omp.cpp
index 5d7f1a60d7..12aceed1d4 100644
--- a/src/USER-OMP/pair_sw_omp.cpp
+++ b/src/USER-OMP/pair_sw_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairSWOMP::PairSWOMP(LAMMPS *lmp) :
-  PairSW(lmp), ThrOMP(lmp, PAIR)
+  PairSW(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairSWOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,33 +43,29 @@ void PairSWOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	eval<1,1>(f, ifrom, ito, tid);
+	eval<1,1>(ifrom, ito, thr);
       } else {
-	eval<1,0>(f, ifrom, ito, tid);
+	eval<1,0>(ifrom, ito, thr);
       }
-    } else eval<0,0>(f, ifrom, ito, tid);
+    } else eval<0,0>(ifrom, ito, thr);
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG>
-void PairSWOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairSWOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,k,ii,jj,kk,jnum,jnumm1,itag,jtag;
   int itype,jtype,ktype,ijparam,ikparam,ijkparam;
@@ -81,10 +76,11 @@ void PairSWOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *tag = atom->tag;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const tag = atom->tag;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
@@ -92,7 +88,7 @@ void PairSWOMP::eval(double **f, int iifrom, int iito, int tid)
 
   double fxtmp,fytmp,fztmp;
 
-  // loop over neighbors of my atoms
+  // loop over full neighbor list of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
@@ -144,7 +140,7 @@ void PairSWOMP::eval(double **f, int iifrom, int iito, int tid)
       f[j][2] -= delz*fpair;
 
       if (EVFLAG) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1,
-			       evdwl,0.0,fpair,delx,dely,delz,tid);
+			       evdwl,0.0,fpair,delx,dely,delz,thr);
     }
 
     jnumm1 = jnum - 1;
@@ -189,7 +185,7 @@ void PairSWOMP::eval(double **f, int iifrom, int iito, int tid)
 	f[k][1] += fk[1];
 	f[k][2] += fk[2];
 
-	if (EVFLAG) ev_tally3_thr(this,i,j,k,evdwl,0.0,fj,fk,delr1,delr2,tid);
+	if (EVFLAG) ev_tally3_thr(this,i,j,k,evdwl,0.0,fj,fk,delr1,delr2,thr);
       }
       f[j][0] += fjxtmp;
       f[j][1] += fjytmp;
diff --git a/src/USER-OMP/pair_sw_omp.h b/src/USER-OMP/pair_sw_omp.h
index 40052d7d41..c4af86007a 100644
--- a/src/USER-OMP/pair_sw_omp.h
+++ b/src/USER-OMP/pair_sw_omp.h
@@ -39,7 +39,7 @@ class PairSWOMP : public PairSW, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_table_omp.cpp b/src/USER-OMP/pair_table_omp.cpp
index 6b14d4c981..e8d63e590d 100644
--- a/src/USER-OMP/pair_table_omp.cpp
+++ b/src/USER-OMP/pair_table_omp.cpp
@@ -26,7 +26,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairTableOMP::PairTableOMP(LAMMPS *lmp) :
-  PairTable(lmp), ThrOMP(lmp, PAIR)
+  PairTable(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -37,7 +37,6 @@ void PairTableOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -45,38 +44,34 @@ void PairTableOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairTableOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairTableOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype,itable;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -89,10 +84,11 @@ void PairTableOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -122,7 +118,7 @@ void PairTableOMP::eval(double **f, int iifrom, int iito, int tid)
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
-      
+
       if (rsq < cutsq[itype][jtype]) {
 	tb = &tables[tabindex[itype][jtype]];
 	if (rsq < tb->innersq)
@@ -181,7 +177,7 @@ void PairTableOMP::eval(double **f, int iifrom, int iito, int tid)
 	}
 
 	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
 
diff --git a/src/USER-OMP/pair_table_omp.h b/src/USER-OMP/pair_table_omp.h
index 6fd1ce74a4..974149b9ac 100644
--- a/src/USER-OMP/pair_table_omp.h
+++ b/src/USER-OMP/pair_table_omp.h
@@ -39,7 +39,7 @@ class PairTableOMP : public PairTable, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_tersoff_omp.cpp b/src/USER-OMP/pair_tersoff_omp.cpp
index f59a8488f7..fdbcd48292 100644
--- a/src/USER-OMP/pair_tersoff_omp.cpp
+++ b/src/USER-OMP/pair_tersoff_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairTersoffOMP::PairTersoffOMP(LAMMPS *lmp) :
-  PairTersoff(lmp), ThrOMP(lmp, PAIR)
+  PairTersoff(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairTersoffOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = vflag_atom = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,35 +43,31 @@ void PairTersoffOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (vflag_atom) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (vflag_atom) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (vflag_atom) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (vflag_atom) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
-    } else eval<0,0,0>(f, ifrom, ito, tid);
+    } else eval<0,0,0>(ifrom, ito, thr);
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int VFLAG_ATOM>
-void PairTersoffOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairTersoffOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,k,ii,jj,kk,jnum;
   int itag,jtag,itype,jtype,ktype,iparam_ij,iparam_ijk;
@@ -84,10 +79,11 @@ void PairTersoffOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *tag = atom->tag;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const tag = atom->tag;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
@@ -147,7 +143,7 @@ void PairTersoffOMP::eval(double **f, int iifrom, int iito, int tid)
       f[j][2] -= delz*fpair;
 
       if (EVFLAG) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1,
-			       evdwl,0.0,fpair,delx,dely,delz,tid);
+			       evdwl,0.0,fpair,delx,dely,delz,thr);
     }
 
     // three-body interactions
@@ -199,7 +195,7 @@ void PairTersoffOMP::eval(double **f, int iifrom, int iito, int tid)
       fjztmp -= delr1[2]*fpair;
 
       if (EVFLAG) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1,evdwl,0.0,
-			       -fpair,-delr1[0],-delr1[1],-delr1[2],tid);
+			       -fpair,-delr1[0],-delr1[1],-delr1[2],thr);
 
       // attractive term via loop over k
 
@@ -229,7 +225,7 @@ void PairTersoffOMP::eval(double **f, int iifrom, int iito, int tid)
 	f[k][1] += fk[1];
 	f[k][2] += fk[2];
 
-	if (VFLAG_ATOM) v_tally3_thr(i,j,k,fj,fk,delr1,delr2,tid);
+	if (VFLAG_ATOM) v_tally3_thr(i,j,k,fj,fk,delr1,delr2,thr);
       }
       f[j][0] += fjxtmp;
       f[j][1] += fjytmp;
diff --git a/src/USER-OMP/pair_tersoff_omp.h b/src/USER-OMP/pair_tersoff_omp.h
index 5e5dc066d2..97c20548af 100644
--- a/src/USER-OMP/pair_tersoff_omp.h
+++ b/src/USER-OMP/pair_tersoff_omp.h
@@ -34,7 +34,7 @@ class PairTersoffOMP : public PairTersoff, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int VFLAG_ATOM>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_yukawa_colloid_omp.cpp b/src/USER-OMP/pair_yukawa_colloid_omp.cpp
index 710ad9df18..6caa13ee90 100644
--- a/src/USER-OMP/pair_yukawa_colloid_omp.cpp
+++ b/src/USER-OMP/pair_yukawa_colloid_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairYukawaColloidOMP::PairYukawaColloidOMP(LAMMPS *lmp) :
-  PairYukawaColloid(lmp), ThrOMP(lmp, PAIR)
+  PairYukawaColloid(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairYukawaColloidOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,38 +43,34 @@ void PairYukawaColloidOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairYukawaColloidOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairYukawaColloidOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair,radi,radj;
@@ -84,11 +79,12 @@ void PairYukawaColloidOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  double *radius = atom->radius;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const radius = atom->radius;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -144,7 +140,7 @@ void PairYukawaColloidOMP::eval(double **f, int iifrom, int iito, int tid)
 	  evdwl *= factor;
 	}
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_yukawa_colloid_omp.h b/src/USER-OMP/pair_yukawa_colloid_omp.h
index 9483cd15c1..c424e9eff3 100644
--- a/src/USER-OMP/pair_yukawa_colloid_omp.h
+++ b/src/USER-OMP/pair_yukawa_colloid_omp.h
@@ -39,7 +39,7 @@ class PairYukawaColloidOMP : public PairYukawaColloid, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_yukawa_omp.cpp b/src/USER-OMP/pair_yukawa_omp.cpp
index 1380e2239c..210c7fcc1e 100644
--- a/src/USER-OMP/pair_yukawa_omp.cpp
+++ b/src/USER-OMP/pair_yukawa_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairYukawaOMP::PairYukawaOMP(LAMMPS *lmp) :
-  PairYukawa(lmp), ThrOMP(lmp, PAIR)
+  PairYukawa(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairYukawaOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,38 +43,34 @@ void PairYukawaOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairYukawaOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairYukawaOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -84,10 +79,11 @@ void PairYukawaOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -141,8 +137,8 @@ void PairYukawaOMP::eval(double **f, int iifrom, int iito, int tid)
 	  evdwl *= factor;
 	}
 
-	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_yukawa_omp.h b/src/USER-OMP/pair_yukawa_omp.h
index e363ac6d17..99abc569fa 100644
--- a/src/USER-OMP/pair_yukawa_omp.h
+++ b/src/USER-OMP/pair_yukawa_omp.h
@@ -39,7 +39,7 @@ class PairYukawaOMP : public PairYukawa, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/thr_omp.cpp b/src/USER-OMP/thr_omp.cpp
index 37ce1f198b..19537868e5 100644
--- a/src/USER-OMP/thr_omp.cpp
+++ b/src/USER-OMP/thr_omp.cpp
@@ -16,213 +16,481 @@
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
-#include "thr_omp.h"
-
-#include "memory.h"
-
 #include "atom.h"
 #include "comm.h"
+#include "error.h"
 #include "force.h"
+#include "memory.h"
+#include "modify.h"
+#include "neighbor.h"
+
+#include "thr_omp.h"
 
 #include "pair.h"
+#include "bond.h"
+#include "angle.h"
 #include "dihedral.h"
-
-#if defined(_OPENMP)
-#include <omp.h>
-#endif
+#include "improper.h"
+#include "kspace.h"
 
 #include "math_const.h"
 
+#include <string.h>
+
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 /* ---------------------------------------------------------------------- */
 
-ThrOMP::ThrOMP(LAMMPS *ptr, int style) : thr_style(style), lmp(ptr)
+ThrOMP::ThrOMP(LAMMPS *ptr, int style) : lmp(ptr), fix(NULL), thr_style(style)
 {
-  // initialize fixed size per thread storage
-  eng_vdwl_thr = eng_coul_thr = eng_bond_thr = NULL;
-  virial_thr = NULL;
-
-  lmp->memory->create(eng_vdwl_thr,lmp->comm->nthreads,"thr_omp:eng_vdwl_thr");
-  lmp->memory->create(eng_coul_thr,lmp->comm->nthreads,"thr_omp:eng_coul_thr");
-  lmp->memory->create(eng_bond_thr,lmp->comm->nthreads,"thr_omp:eng_bond_thr");
-  lmp->memory->create(virial_thr,lmp->comm->nthreads,6,"thr_omp:virial_thr");
-
-  // variable size per thread, per atom storage
-  // the actually allocation happens via memory->grow() in ev_steup_thr()
-  maxeatom_thr = maxvatom_thr = 0;
-  evflag_global = evflag_atom = 0;
-  eatom_thr = NULL;
-  vatom_thr = NULL;
+  // register fix omp with this class
+  int ifix = lmp->modify->find_fix("package_omp");
+  if (ifix < 0)
+    lmp->error->all(FLERR,"The 'package omp' command is required for /omp styles");
+  fix = static_cast<FixOMP *>(lmp->modify->fix[ifix]);
 }
 
 /* ---------------------------------------------------------------------- */
 
 ThrOMP::~ThrOMP()
 {
-  lmp->memory->destroy(eng_vdwl_thr);
-  lmp->memory->destroy(eng_coul_thr);
-  lmp->memory->destroy(eng_bond_thr);
-  lmp->memory->destroy(virial_thr);
-  lmp->memory->destroy(eatom_thr);
-  lmp->memory->destroy(vatom_thr);
+  // nothing to do?
 }
 
-/* ---------------------------------------------------------------------- */
+/* ----------------------------------------------------------------------
+   Hook up per thread per atom arrays into the tally infrastructure
+   ---------------------------------------------------------------------- */
 
-void ThrOMP::ev_setup_acc_thr(int ntotal, int eflag_global, int vflag_global,
-			     int eflag_atom, int vflag_atom, int nthreads)
+void ThrOMP::ev_setup_thr(int eflag, int vflag, int nall, double *eatom,
+			  double **vatom, ThrData *thr)
 {
-  int t,i;
-
-  evflag_global = (eflag_global || vflag_global);
-  evflag_atom = (eflag_atom || vflag_atom);
+  const int tid = thr->get_tid();
   
-  for (t = 0; t < nthreads; ++t) {
+  if (thr_style & THR_PAIR) {
+    if (eflag & 2) {
+      thr->eatom_pair = eatom + tid*nall;
+      memset(&(thr->eatom_pair[0]),0,nall*sizeof(double));
+    }
+    if (vflag & 4) {
+      thr->vatom_pair = vatom + tid*nall;
+      memset(&(thr->vatom_pair[0][0]),0,nall*6*sizeof(double));
+    }
+  }
+
+  if (thr_style & THR_BOND) {
+    if (eflag & 2) {
+      thr->eatom_bond = eatom + tid*nall;
+      memset(&(thr->eatom_bond[0]),0,nall*sizeof(double));
+    }
+    if (vflag & 4) {
+      thr->vatom_bond = vatom + tid*nall;
+      memset(&(thr->vatom_bond[0][0]),0,nall*6*sizeof(double));
+    }
+  }
 
-    if (eflag_global) 
-      eng_vdwl_thr[t] = eng_coul_thr[t] = eng_bond_thr[t] = 0.0;
+  if (thr_style & THR_ANGLE) {
+    if (eflag & 2) {
+      thr->eatom_angle = eatom + tid*nall;
+      memset(&(thr->eatom_angle[0]),0,nall*sizeof(double));
+    }
+    if (vflag & 4) {
+      thr->vatom_angle = vatom + tid*nall;
+      memset(&(thr->vatom_angle[0][0]),0,nall*6*sizeof(double));
+    }
+  }
 
-    if (vflag_global) 
-      for (i = 0; i < 6; ++i)
-	virial_thr[t][i] = 0.0;
+  if (thr_style & THR_DIHEDRAL) {
+    if (eflag & 2) {
+      thr->eatom_dihed = eatom + tid*nall;
+      memset(&(thr->eatom_dihed[0]),0,nall*sizeof(double));
+    }
+    if (vflag & 4) {
+      thr->vatom_dihed = vatom + tid*nall;
+      memset(&(thr->vatom_dihed[0][0]),0,nall*6*sizeof(double));
+    }
+  }
 
-    if (eflag_atom)
-      for (i = 0; i < ntotal; ++i)
-	eatom_thr[t][i] = 0.0;
-    
-    if (vflag_atom)
-      for (i = 0; i < ntotal; ++i) {
-        vatom_thr[t][i][0] = 0.0;
-        vatom_thr[t][i][1] = 0.0;
-        vatom_thr[t][i][2] = 0.0;
-        vatom_thr[t][i][3] = 0.0;
-        vatom_thr[t][i][4] = 0.0;
-        vatom_thr[t][i][5] = 0.0;
-      }
+  if (thr_style & THR_IMPROPER) {
+    if (eflag & 2) {
+      thr->eatom_imprp = eatom + tid*nall;
+      memset(&(thr->eatom_imprp[0]),0,nall*sizeof(double));
+    }
+    if (vflag & 4) {
+      thr->vatom_imprp = vatom + tid*nall;
+      memset(&(thr->vatom_imprp[0][0]),0,nall*6*sizeof(double));
+    }
   }
-}
 
-/* ---------------------------------------------------------------------- */
+#if 0 /* not supported (yet) */
+  if (thr_style & THR_KSPACE) {
+    if (eflag & 2) {
+      thr->eatom_kspce = eatom + tid*nall;
+      memset(&(thr->eatom_kspce[0]),0,nall*sizeof(double));
+    }
+    if (vflag & 4) {
+      thr->vatom_kspce = vatom + tid*nall;
+      memset(&(thr->vatom_kspce[0][0]),0,nall*6*sizeof(double));
+    }
+  }
+#endif
+}
 
-void ThrOMP::ev_setup_thr(Dihedral *dihed)
+/* ----------------------------------------------------------------------
+   Reduce per thread data into the regular structures
+   Reduction of global properties is serialized with a "critical"
+   directive, so that only one thread at a time will access the
+   global variables. Since we are not synchronized, this should
+   come with little overhead. The reduction of per-atom properties
+   in contrast is parallelized over threads in the same way as forces.
+   ---------------------------------------------------------------------- */
+
+void ThrOMP::reduce_thr(void *style, const int eflag, const int vflag,
+			ThrData *const thr, const int nproxy)
 {
-  int nthreads = lmp->comm->nthreads;
+  const int nlocal = lmp->atom->nlocal;
+  const int nghost = lmp->atom->nghost;
+  const int nall = nlocal + nghost;
+  const int nfirst = lmp->atom->nfirst;
+  const int nthreads = lmp->comm->nthreads;
+  const int evflag = eflag | vflag;
+  
+  const int tid = thr->get_tid();
+  double **f = lmp->atom->f;
+  double **x = lmp->atom->x;
+
+  switch (thr_style) {
 
-  // reallocate per-atom arrays if necessary
-  if (dihed->eflag_atom && lmp->atom->nmax > maxeatom_thr) {
-    maxeatom_thr = lmp->atom->nmax;
-    lmp->memory->grow(eatom_thr,nthreads,maxeatom_thr,"thr_omp:eatom_thr");
+  case THR_PAIR: {
+    Pair * const pair = lmp->force->pair;
+  
+    if (pair->vflag_fdotr) {
+      if (lmp->neighbor->includegroup == 0)
+	thr->virial_fdotr_compute(x, nlocal, nghost, -1);
+      else
+	thr->virial_fdotr_compute(x, nlocal, nghost, nfirst);
+    }
+
+    if (evflag) {
+#if defined(_OPENMP)
+#pragma omp critical
+#endif
+      {
+	if (eflag & 1) {
+	  pair->eng_vdwl += thr->eng_vdwl;
+	  pair->eng_coul += thr->eng_coul;
+	  thr->eng_vdwl = 0.0;
+	  thr->eng_coul = 0.0;
+	}
+	if (vflag & 3)
+	  for (int i=0; i < 6; ++i) {
+	    pair->virial[i] += thr->virial_pair[i];
+	    thr->virial_pair[i] = 0.0;
+	  }
+      }
+      if (eflag & 2) {
+	sync_threads();
+	data_reduce_thr(&(pair->eatom[0]), nall, nthreads, 1, tid);
+      }
+      if (vflag & 4) {
+	sync_threads();
+	data_reduce_thr(&(pair->vatom[0][0]), nall, nthreads, 6, tid);
+      }
+    }
   }
-  if (dihed->vflag_atom && lmp->atom->nmax > maxvatom_thr) {
-    maxvatom_thr = lmp->atom->nmax;
-    lmp->memory->grow(vatom_thr,nthreads,maxeatom_thr,6,"thr_omp:vatom_thr");
+    break;
+
+  case THR_PAIR|THR_PROXY: {
+    Pair * const pair = lmp->force->pair;
+    
+    if (tid >= nproxy && pair->vflag_fdotr) {
+      if (lmp->neighbor->includegroup == 0)
+	thr->virial_fdotr_compute(x, nlocal, nghost, -1);
+      else
+	thr->virial_fdotr_compute(x, nlocal, nghost, nfirst);
+    }
+    
+    if (evflag) {
+#if defined(_OPENMP)
+#pragma omp critical
+#endif
+      {
+	if (tid < nproxy) {
+	  // nothing to do for kspace?
+	  if (vflag & 3)
+	    for (int i=0; i < 6; ++i) {
+	      thr->virial_pair[i] = 0.0;
+	    }
+	} else {
+	  if (eflag & 1) {
+	    pair->eng_vdwl += thr->eng_vdwl;
+	    pair->eng_coul += thr->eng_coul;
+	    thr->eng_vdwl = 0.0;
+	    thr->eng_coul = 0.0;
+	  }
+	  if (vflag & 3)
+	    for (int i=0; i < 6; ++i) {
+	      pair->virial[i] += thr->virial_pair[i];
+	      thr->virial_pair[i] = 0.0;
+	    }
+	}
+      }
+      if (eflag & 2) {
+	sync_threads();
+	data_reduce_thr(&(pair->eatom[0]), nall, nthreads, 1, tid);
+      }
+      if (vflag & 4) {
+	sync_threads();
+	data_reduce_thr(&(pair->vatom[0][0]), nall, nthreads, 6, tid);
+      }
+    }
   }
+    break;
 
-  int ntotal = (lmp->force->newton_bond) ? 
-    (lmp->atom->nlocal + lmp->atom->nghost) : lmp->atom->nlocal;
+  case THR_BOND:
 
-  // set up per thread accumulators
-  ev_setup_acc_thr(ntotal, dihed->eflag_global, dihed->vflag_global,
-		   dihed->eflag_atom, dihed->vflag_atom, nthreads);
-}
+    if (evflag) {
+      Bond * const bond = lmp->force->bond;
+#if defined(_OPENMP)
+#pragma omp critical
+#endif
+      {
+	bond->energy += thr->eng_bond;
+	for (int i=0; i < 6; ++i)
+	  bond->virial[i] += thr->virial_bond[i];
+      }
+      if (eflag & 2) {
+	sync_threads();
+	data_reduce_thr(&(bond->eatom[0]), nall, nthreads, 1, tid);
+      }
+      if (vflag & 4) {
+	sync_threads();
+	data_reduce_thr(&(bond->vatom[0][0]), nall, nthreads, 6, tid);
+      }
+    }
+    break;
 
-/* ---------------------------------------------------------------------- */
+  case THR_ANGLE:
 
-void ThrOMP::ev_setup_thr(Pair *pair)
-{
-  int nthreads = lmp->comm->nthreads;
+    if (evflag) {
+      Angle * const angle = lmp->force->angle;
+#if defined(_OPENMP)
+#pragma omp critical
+#endif
+      {
+	angle->energy += thr->eng_angle;
+	for (int i=0; i < 6; ++i)
+	  angle->virial[i] += thr->virial_angle[i];
+      }
+      if (eflag & 2) {
+	sync_threads();
+	data_reduce_thr(&(angle->eatom[0]), nall, nthreads, 1, tid);
+      }
+      if (vflag & 4) {
+	sync_threads();
+	data_reduce_thr(&(angle->vatom[0][0]), nall, nthreads, 6, tid);
+      }
+    }
+    break;
 
-  // reallocate per-atom arrays if necessary
-  if (pair->eflag_atom && lmp->atom->nmax > maxeatom_thr) {
-    maxeatom_thr = lmp->atom->nmax;
-    lmp->memory->grow(eatom_thr,nthreads,maxeatom_thr,"thr_omp:eatom_thr");
-  }
-  if (pair->vflag_atom && lmp->atom->nmax > maxvatom_thr) {
-    maxvatom_thr = lmp->atom->nmax;
-    lmp->memory->grow(vatom_thr,nthreads,maxeatom_thr,6,"thr_omp:vatom_thr");
-  }
+  case THR_DIHEDRAL:
+    
+    if (evflag) {
+      Dihedral * const dihedral = lmp->force->dihedral;
+#if defined(_OPENMP)
+#pragma omp critical
+#endif
+      {
+	dihedral->energy += thr->eng_dihed;
+	for (int i=0; i < 6; ++i)
+	  dihedral->virial[i] += thr->virial_dihed[i];
+      }
+      if (eflag & 2) {
+	sync_threads();
+	data_reduce_thr(&(dihedral->eatom[0]), nall, nthreads, 1, tid);
+      }
+      if (vflag & 4) {
+	sync_threads();
+	data_reduce_thr(&(dihedral->vatom[0][0]), nall, nthreads, 6, tid);
+      }
+    }
+    break;
 
-  int ntotal = (lmp->force->newton) ?
-    (lmp->atom->nlocal + lmp->atom->nghost) : lmp->atom->nlocal;
+  case THR_DIHEDRAL|THR_CHARMM: // special case for CHARMM dihedrals
+
+    if (evflag) {
+      Dihedral * const dihedral = lmp->force->dihedral;
+      Pair * const pair = lmp->force->pair;
+#if defined(_OPENMP)
+#pragma omp critical
+#endif
+      {
+	if (eflag & 1) {
+	  dihedral->energy += thr->eng_dihed;
+	  pair->eng_vdwl += thr->eng_vdwl;
+	  pair->eng_coul += thr->eng_coul;
+	}
+
+	if (vflag & 3) {
+	  for (int i=0; i < 6; ++i) {
+	    dihedral->virial[i] += thr->virial_dihed[i];
+	    pair->virial[i] += thr->virial_pair[i];
+	  }
+	}
+      }
+      if (eflag & 2) {
+	sync_threads();
+	data_reduce_thr(&(dihedral->eatom[0]), nall, nthreads, 1, tid);
+	data_reduce_thr(&(pair->eatom[0]), nall, nthreads, 1, tid);
+      }
+      if (vflag & 4) {
+	sync_threads();
+	data_reduce_thr(&(dihedral->vatom[0][0]), nall, nthreads, 6, tid);
+	data_reduce_thr(&(pair->vatom[0][0]), nall, nthreads, 6, tid);
+      }
+    }
+    break;
+
+  case THR_IMPROPER:
+
+    if (evflag) {
+      Improper *improper = lmp->force->improper;
+#if defined(_OPENMP)
+#pragma omp critical
+#endif
+      {
+	improper->energy += thr->eng_imprp;
+	for (int i=0; i < 6; ++i)
+	  improper->virial[i] += thr->virial_imprp[i];
+      }
+      if (eflag & 2) {
+	sync_threads();
+	data_reduce_thr(&(improper->eatom[0]), nall, nthreads, 1, tid);
+      }
+      if (vflag & 4) {
+	sync_threads();
+	data_reduce_thr(&(improper->vatom[0][0]), nall, nthreads, 6, tid);
+      }
+    }
+    break;
+
+  case THR_KSPACE|THR_PROXY: // fallthrough
+  case THR_KSPACE:
+    // nothing to do (for now)
+#if 0
+    if (evflag) {
+      KSpace *kspace = lmp->force->kspace;
+#if defined(_OPENMP)
+#pragma omp critical
+#endif
+      {
+	kspace->energy += thr->eng_kspce;
+	for (int i=0; i < 6; ++i)
+	  kspace->virial[i] += thr->virial_kspce[i];
+      }
+      if (eflag & 2) {
+	sync_threads();
+	data_reduce_thr(&(kspace->eatom[0]), nall, nthreads, 1, tid);
+      }
+      if (vflag & 4) {
+	sync_threads();
+	data_reduce_thr(&(kspace->vatom[0][0]), nall, nthreads, 6, tid);
+      }
+    }
+#endif
+    break;
 
-  // set up per thread accumulators
-  ev_setup_acc_thr(ntotal, pair->eflag_global, pair->vflag_global,
-		   pair->eflag_atom, pair->vflag_atom, nthreads);
+  default:
+    printf("tid:%d unhandled thr_style case %d\n", tid, thr_style);
+    break;
+  }
+    
+    if (style == fix->last_omp_style) {
+    sync_threads();
+    data_reduce_thr(&(f[0][0]), nall, nthreads, 3, tid);
+    if (lmp->atom->torque)
+      data_reduce_thr(&(lmp->atom->torque[0][0]), nall, nthreads, 3, tid);
+  }
 }
 
 /* ----------------------------------------------------------------------
-   reduce the per thread accumulated E/V data into the canonical accumulators.
+   tally eng_vdwl and eng_coul into per thread global and per-atom accumulators
 ------------------------------------------------------------------------- */
-void ThrOMP::ev_reduce_thr(Dihedral *dihed)
+
+void ThrOMP::e_tally_thr(Pair * const pair, const int i, const int j, 
+			 const int nlocal, const int newton_pair,
+			 const double evdwl, const double ecoul, ThrData * const thr)
 {
-  int nthreads = lmp->comm->nthreads;
-  int ntotal = (lmp->force->newton_bond) ?
-    (lmp->atom->nlocal + lmp->atom->nghost) : lmp->atom->nlocal;
-
-  for (int n = 0; n < nthreads; ++n) {
-    dihed->energy += eng_bond_thr[n];
-    if (dihed->vflag_either) {
-      dihed->virial[0] += virial_thr[n][0];
-      dihed->virial[1] += virial_thr[n][1];
-      dihed->virial[2] += virial_thr[n][2];
-      dihed->virial[3] += virial_thr[n][3];
-      dihed->virial[4] += virial_thr[n][4];
-      dihed->virial[5] += virial_thr[n][5];
-      if (dihed->vflag_atom) {
-        for (int i = 0; i < ntotal; ++i) {
-          dihed->vatom[i][0] += vatom_thr[n][i][0];
-          dihed->vatom[i][1] += vatom_thr[n][i][1];
-          dihed->vatom[i][2] += vatom_thr[n][i][2];
-          dihed->vatom[i][3] += vatom_thr[n][i][3];
-          dihed->vatom[i][4] += vatom_thr[n][i][4];
-          dihed->vatom[i][5] += vatom_thr[n][i][5];
-        }
+  if (pair->eflag_global) {
+    if (newton_pair) {
+      thr->eng_vdwl += evdwl;
+      thr->eng_coul += ecoul;
+    } else {
+      const double evdwlhalf = 0.5*evdwl;
+      const double ecoulhalf = 0.5*ecoul;
+      if (i < nlocal) {
+	thr->eng_vdwl += evdwlhalf;
+	thr->eng_coul += ecoulhalf;
       }
-    }
-    if (dihed->eflag_atom) {
-      for (int i = 0; i < ntotal; ++i) {
-        dihed->eatom[i] += eatom_thr[n][i];
+      if (j < nlocal) {
+	thr->eng_vdwl += evdwlhalf;
+	thr->eng_coul += ecoulhalf;
       }
     }
   }
+  if (pair->eflag_atom) {
+    const double epairhalf = 0.5 * (evdwl + ecoul);
+    if (newton_pair || i < nlocal) thr->eatom_pair[i] += epairhalf;
+    if (newton_pair || j < nlocal) thr->eatom_pair[j] += epairhalf;
+  }
+}
+
+/* helper functions */
+static void v_tally(double * const vout, const double * const vin) 
+{
+  vout[0] += vin[0];
+  vout[1] += vin[1];
+  vout[2] += vin[2];
+  vout[3] += vin[3];
+  vout[4] += vin[4];
+  vout[5] += vin[5];
+}
+
+static void v_tally(double * const vout, const double scale, const double * const vin) 
+{
+  vout[0] += scale*vin[0];
+  vout[1] += scale*vin[1];
+  vout[2] += scale*vin[2];
+  vout[3] += scale*vin[3];
+  vout[4] += scale*vin[4];
+  vout[5] += scale*vin[5];
 }
 
 /* ----------------------------------------------------------------------
-   reduce the per thread accumulated E/V data into the canonical accumulators.
+   tally virial into per thread global and per-atom accumulators
 ------------------------------------------------------------------------- */
-void ThrOMP::ev_reduce_thr(Pair *pair)
+void ThrOMP::v_tally_thr(Pair * const pair, const int i, const int j, 
+			 const int nlocal, const int newton_pair,
+			 const double * const v, ThrData * const thr)
 {
-  const int nthreads = lmp->comm->nthreads;
-  const int ntotal = (lmp->force->newton) ? 
-    (lmp->atom->nlocal + lmp->atom->nghost) : lmp->atom->nlocal;
-
-  for (int n = 0; n < nthreads; ++n) {
-    pair->eng_vdwl += eng_vdwl_thr[n];
-    pair->eng_coul += eng_coul_thr[n];
-    if (pair->vflag_either) {
-      pair->virial[0] += virial_thr[n][0];
-      pair->virial[1] += virial_thr[n][1];
-      pair->virial[2] += virial_thr[n][2];
-      pair->virial[3] += virial_thr[n][3];
-      pair->virial[4] += virial_thr[n][4];
-      pair->virial[5] += virial_thr[n][5];
-      if (pair->vflag_atom) {
-        for (int i = 0; i < ntotal; ++i) {
-          pair->vatom[i][0] += vatom_thr[n][i][0];
-          pair->vatom[i][1] += vatom_thr[n][i][1];
-          pair->vatom[i][2] += vatom_thr[n][i][2];
-          pair->vatom[i][3] += vatom_thr[n][i][3];
-          pair->vatom[i][4] += vatom_thr[n][i][4];
-          pair->vatom[i][5] += vatom_thr[n][i][5];
-        }
-      }
+  if (pair->vflag_global) {
+    double * const va = thr->virial_pair;
+    if (newton_pair) {
+      v_tally(va,v);
+    } else {
+      if (i < nlocal) v_tally(va,0.5,v);
+      if (j < nlocal) v_tally(va,0.5,v);
     }
-    if (pair->eflag_atom) {
-      for (int i = 0; i < ntotal; ++i) {
-        pair->eatom[i] += eatom_thr[n][i];
-      }
+  }
+
+  if (pair->vflag_atom) {
+    if (newton_pair || i < nlocal) {
+      double * const va = thr->vatom_pair[i];
+      v_tally(va,0.5,v);
+    }
+    if (newton_pair || j < nlocal) {
+      double * const va = thr->vatom_pair[j];
+      v_tally(va,0.5,v);
     }
   }
 }
@@ -232,39 +500,17 @@ void ThrOMP::ev_reduce_thr(Pair *pair)
    need i < nlocal test since called by bond_quartic and dihedral_charmm
 ------------------------------------------------------------------------- */
 
-void ThrOMP::ev_tally_thr(Pair *pair, int i, int j, int nlocal,
-			  int newton_pair, double evdwl, double ecoul,
-			  double fpair, double delx, double dely,
-			  double delz, int tid)
+void ThrOMP::ev_tally_thr(Pair * const pair, const int i, const int j, const int nlocal,
+			  const int newton_pair, const double evdwl, const double ecoul,
+			  const double fpair, const double delx, const double dely,
+			  const double delz, ThrData * const thr)
 {
-  double evdwlhalf,ecoulhalf,epairhalf,v[6];
 
-  if (pair->eflag_either) {
-    if (pair->eflag_global) {
-      if (newton_pair) {
-	eng_vdwl_thr[tid] += evdwl;
-	eng_coul_thr[tid] += ecoul;
-      } else {
-	evdwlhalf = 0.5*evdwl;
-	ecoulhalf = 0.5*ecoul;
-	if (i < nlocal) {
-	  eng_vdwl_thr[tid] += evdwlhalf;
-	  eng_coul_thr[tid] += ecoulhalf;
-	}
-	if (j < nlocal) {
-	  eng_vdwl_thr[tid] += evdwlhalf;
-	  eng_coul_thr[tid] += ecoulhalf;
-	}
-      }
-    }
-    if (pair->eflag_atom) {
-      epairhalf = 0.5 * (evdwl + ecoul);
-      if (newton_pair || i < nlocal) eatom_thr[tid][i] += epairhalf;
-      if (newton_pair || j < nlocal) eatom_thr[tid][j] += epairhalf;
-    }
-  }
+  if (pair->eflag_either)
+    e_tally_thr(pair, i, j, nlocal, newton_pair, evdwl, ecoul, thr);
 
   if (pair->vflag_either) {
+    double v[6];
     v[0] = delx*delx*fpair;
     v[1] = dely*dely*fpair;
     v[2] = delz*delz*fpair;
@@ -272,52 +518,7 @@ void ThrOMP::ev_tally_thr(Pair *pair, int i, int j, int nlocal,
     v[4] = delx*delz*fpair;
     v[5] = dely*delz*fpair;
 
-    if (pair->vflag_global) {
-      if (newton_pair) {
-	virial_thr[tid][0] += v[0];
-	virial_thr[tid][1] += v[1];
-	virial_thr[tid][2] += v[2];
-	virial_thr[tid][3] += v[3];
-	virial_thr[tid][4] += v[4];
-	virial_thr[tid][5] += v[5];
-      } else {
-	if (i < nlocal) {
-	  virial_thr[tid][0] += 0.5*v[0];
-	  virial_thr[tid][1] += 0.5*v[1];
-	  virial_thr[tid][2] += 0.5*v[2];
-	  virial_thr[tid][3] += 0.5*v[3];
-	  virial_thr[tid][4] += 0.5*v[4];
-	  virial_thr[tid][5] += 0.5*v[5];
-	}
-	if (j < nlocal) {
-	  virial_thr[tid][0] += 0.5*v[0];
-	  virial_thr[tid][1] += 0.5*v[1];
-	  virial_thr[tid][2] += 0.5*v[2];
-	  virial_thr[tid][3] += 0.5*v[3];
-	  virial_thr[tid][4] += 0.5*v[4];
-	  virial_thr[tid][5] += 0.5*v[5];
-	}
-      }
-    }
-
-    if (pair->vflag_atom) {
-      if (newton_pair || i < nlocal) {
-	vatom_thr[tid][i][0] += 0.5*v[0];
-	vatom_thr[tid][i][1] += 0.5*v[1];
-	vatom_thr[tid][i][2] += 0.5*v[2];
-	vatom_thr[tid][i][3] += 0.5*v[3];
-	vatom_thr[tid][i][4] += 0.5*v[4];
-	vatom_thr[tid][i][5] += 0.5*v[5];
-      }
-      if (newton_pair || j < nlocal) {
-	vatom_thr[tid][j][0] += 0.5*v[0];
-	vatom_thr[tid][j][1] += 0.5*v[1];
-	vatom_thr[tid][j][2] += 0.5*v[2];
-	vatom_thr[tid][j][3] += 0.5*v[3];
-	vatom_thr[tid][j][4] += 0.5*v[4];
-	vatom_thr[tid][j][5] += 0.5*v[5];
-      }
-    }
+    v_tally_thr(pair, i, j, nlocal, newton_pair, v, thr);
   }
 }
 
@@ -326,39 +527,19 @@ void ThrOMP::ev_tally_thr(Pair *pair, int i, int j, int nlocal,
    for virial, have delx,dely,delz and fx,fy,fz
 ------------------------------------------------------------------------- */
 
-void ThrOMP::ev_tally_xyz_thr(Pair *pair, int i, int j, int nlocal,
-			      int newton_pair, double evdwl, double ecoul,
-			      double fx, double fy, double fz,
-			      double delx, double dely, double delz, int tid)
+void ThrOMP::ev_tally_xyz_thr(Pair * const pair, const int i, const int j,
+			      const int nlocal, const int newton_pair, 
+			      const double evdwl, const double ecoul,
+			      const double fx, const double fy, const double fz,
+			      const double delx, const double dely, const double delz,
+			      ThrData * const thr)
 {
-  double evdwlhalf,ecoulhalf,epairhalf,v[6];
 
-  if (pair->eflag_either) {
-    if (pair->eflag_global) {
-      if (newton_pair) {
-	eng_vdwl_thr[tid] += evdwl;
-	eng_coul_thr[tid] += ecoul;
-      } else {
-	evdwlhalf = 0.5*evdwl;
-	ecoulhalf = 0.5*ecoul;
-	if (i < nlocal) {
-	  eng_vdwl_thr[tid] += evdwlhalf;
-	  eng_coul_thr[tid] += ecoulhalf;
-	}
-	if (j < nlocal) {
-	  eng_vdwl_thr[tid] += evdwlhalf;
-	  eng_coul_thr[tid] += ecoulhalf;
-	}
-      }
-    }
-    if (pair->eflag_atom) {
-      epairhalf = 0.5 * (evdwl + ecoul);
-      if (newton_pair || i < nlocal) eatom_thr[tid][i] += epairhalf;
-      if (newton_pair || j < nlocal) eatom_thr[tid][j] += epairhalf;
-    }
-  }
+  if (pair->eflag_either)
+    e_tally_thr(pair, i, j, nlocal, newton_pair, evdwl, ecoul, thr);
 
   if (pair->vflag_either) {
+    double v[6];
     v[0] = delx*fx;
     v[1] = dely*fy;
     v[2] = delz*fz;
@@ -366,52 +547,7 @@ void ThrOMP::ev_tally_xyz_thr(Pair *pair, int i, int j, int nlocal,
     v[4] = delx*fz;
     v[5] = dely*fz;
 
-    if (pair->vflag_global) {
-      if (newton_pair) {
-	virial_thr[tid][0] += v[0];
-	virial_thr[tid][1] += v[1];
-	virial_thr[tid][2] += v[2];
-	virial_thr[tid][3] += v[3];
-	virial_thr[tid][4] += v[4];
-	virial_thr[tid][5] += v[5];
-      } else {
-	if (i < nlocal) {
-	  virial_thr[tid][0] += 0.5*v[0];
-	  virial_thr[tid][1] += 0.5*v[1];
-	  virial_thr[tid][2] += 0.5*v[2];
-	  virial_thr[tid][3] += 0.5*v[3];
-	  virial_thr[tid][4] += 0.5*v[4];
-	  virial_thr[tid][5] += 0.5*v[5];
-	}
-	if (j < nlocal) {
-	  virial_thr[tid][0] += 0.5*v[0];
-	  virial_thr[tid][1] += 0.5*v[1];
-	  virial_thr[tid][2] += 0.5*v[2];
-	  virial_thr[tid][3] += 0.5*v[3];
-	  virial_thr[tid][4] += 0.5*v[4];
-	  virial_thr[tid][5] += 0.5*v[5];
-	}
-      }
-    }
-
-    if (pair->vflag_atom) {
-      if (newton_pair || i < nlocal) {
-	vatom_thr[tid][i][0] += 0.5*v[0];
-	vatom_thr[tid][i][1] += 0.5*v[1];
-	vatom_thr[tid][i][2] += 0.5*v[2];
-	vatom_thr[tid][i][3] += 0.5*v[3];
-	vatom_thr[tid][i][4] += 0.5*v[4];
-	vatom_thr[tid][i][5] += 0.5*v[5];
-      }
-      if (newton_pair || j < nlocal) {
-	vatom_thr[tid][j][0] += 0.5*v[0];
-	vatom_thr[tid][j][1] += 0.5*v[1];
-	vatom_thr[tid][j][2] += 0.5*v[2];
-	vatom_thr[tid][j][3] += 0.5*v[3];
-	vatom_thr[tid][j][4] += 0.5*v[4];
-	vatom_thr[tid][j][5] += 0.5*v[5];
-      }
-    }
+    v_tally_thr(pair, i, j, nlocal, newton_pair, v, thr);
   }
 }
 
@@ -421,25 +557,28 @@ void ThrOMP::ev_tally_xyz_thr(Pair *pair, int i, int j, int nlocal,
    virial = riFi + rjFj + rkFk = (rj-ri) Fj + (rk-ri) Fk = drji*fj + drki*fk
  ------------------------------------------------------------------------- */
 
-void ThrOMP::ev_tally3_thr(Pair *pair, int i, int j, int k, double evdwl, double ecoul,
-			   double *fj, double *fk, double *drji, double *drki, int tid)
+void ThrOMP::ev_tally3_thr(Pair * const pair, const int i, const int j, const int k,
+			   const double evdwl, const double ecoul,
+			   const double * const fj, const double * const fk,
+			   const double * const drji, const double * const drki,
+			   ThrData * const thr)
 {
-  double epairthird,v[6];
-
   if (pair->eflag_either) {
     if (pair->eflag_global) {
-      eng_vdwl_thr[tid] += evdwl;
-      eng_coul_thr[tid] += ecoul;
+      thr->eng_vdwl += evdwl;
+      thr->eng_coul += ecoul;
     }
     if (pair->eflag_atom) {
-      epairthird = THIRD * (evdwl + ecoul);
-      eatom_thr[tid][i] += epairthird;
-      eatom_thr[tid][j] += epairthird;
-      eatom_thr[tid][k] += epairthird;
+      const double epairthird = THIRD * (evdwl + ecoul);
+      thr->eatom_pair[i] += epairthird;
+      thr->eatom_pair[j] += epairthird;
+      thr->eatom_pair[k] += epairthird;
     }
   }
 
   if (pair->vflag_either) {
+    double v[6];
+
     v[0] = drji[0]*fj[0] + drki[0]*fk[0];
     v[1] = drji[1]*fj[1] + drki[1]*fk[1];
     v[2] = drji[2]*fj[2] + drki[2]*fk[2];
@@ -447,21 +586,12 @@ void ThrOMP::ev_tally3_thr(Pair *pair, int i, int j, int k, double evdwl, double
     v[4] = drji[0]*fj[2] + drki[0]*fk[2];
     v[5] = drji[1]*fj[2] + drki[1]*fk[2];
       
-    if (pair->vflag_global) {
-      virial_thr[tid][0] += v[0];
-      virial_thr[tid][1] += v[1];
-      virial_thr[tid][2] += v[2];
-      virial_thr[tid][3] += v[3];
-      virial_thr[tid][4] += v[4];
-      virial_thr[tid][5] += v[5];
-    }
+    if (pair->vflag_global) v_tally(thr->virial_pair,v);
 
     if (pair->vflag_atom) {
-      for (int n=0; n < 6; ++n) {
-	vatom_thr[tid][i][n] += THIRD*v[n];
-	vatom_thr[tid][j][n] += THIRD*v[n];
-	vatom_thr[tid][k][n] += THIRD*v[n];
-      }
+      v_tally(thr->vatom_pair[i],THIRD,v);
+      v_tally(thr->vatom_pair[j],THIRD,v);
+      v_tally(thr->vatom_pair[k],THIRD,v);
     }
   }
 }
@@ -471,20 +601,23 @@ void ThrOMP::ev_tally3_thr(Pair *pair, int i, int j, int k, double evdwl, double
    called by AIREBO potential, newton_pair is always on
  ------------------------------------------------------------------------- */
 
-void ThrOMP::ev_tally4_thr(Pair *pair, int i, int j, int k, int m, double evdwl,
-			   double *fi, double *fj, double *fk,
-			   double *drim, double *drjm, double *drkm,int tid)
+void ThrOMP::ev_tally4_thr(Pair * const pair, const int i, const int j,
+			   const int k, const int m, const double evdwl,
+			   const double * const fi, const double * const fj,
+			   const double * const fk, const double * const drim,
+			   const double * const drjm, const double * const drkm,
+			   ThrData * const thr)
 {
-  double epairfourth,v[6];
+  double v[6];
 
   if (pair->eflag_either) {
-    if (pair->eflag_global) eng_vdwl_thr[tid] += evdwl;
+    if (pair->eflag_global) thr->eng_vdwl += evdwl;
     if (pair->eflag_atom) {
-      epairfourth = 0.25 * evdwl;
-      eatom_thr[tid][i] += epairfourth;
-      eatom_thr[tid][j] += epairfourth;
-      eatom_thr[tid][k] += epairfourth;
-      eatom_thr[tid][m] += epairfourth;
+      const double epairfourth = 0.25 * evdwl;
+      thr->eatom_pair[i] += epairfourth;
+      thr->eatom_pair[j] += epairfourth;
+      thr->eatom_pair[k] += epairfourth;
+      thr->eatom_pair[m] += epairfourth;
     }
   }
 
@@ -496,14 +629,10 @@ void ThrOMP::ev_tally4_thr(Pair *pair, int i, int j, int k, int m, double evdwl,
     v[4] = 0.25 * (drim[0]*fi[2] + drjm[0]*fj[2] + drkm[0]*fk[2]);
     v[5] = 0.25 * (drim[1]*fi[2] + drjm[1]*fj[2] + drkm[1]*fk[2]);
     
-    vatom_thr[tid][i][0] += v[0]; vatom_thr[tid][i][1] += v[1]; vatom_thr[tid][i][2] += v[2];
-    vatom_thr[tid][i][3] += v[3]; vatom_thr[tid][i][4] += v[4]; vatom_thr[tid][i][5] += v[5];
-    vatom_thr[tid][j][0] += v[0]; vatom_thr[tid][j][1] += v[1]; vatom_thr[tid][j][2] += v[2];
-    vatom_thr[tid][j][3] += v[3]; vatom_thr[tid][j][4] += v[4]; vatom_thr[tid][j][5] += v[5];
-    vatom_thr[tid][k][0] += v[0]; vatom_thr[tid][k][1] += v[1]; vatom_thr[tid][k][2] += v[2];
-    vatom_thr[tid][k][3] += v[3]; vatom_thr[tid][k][4] += v[4]; vatom_thr[tid][k][5] += v[5];
-    vatom_thr[tid][m][0] += v[0]; vatom_thr[tid][m][1] += v[1]; vatom_thr[tid][m][2] += v[2];
-    vatom_thr[tid][m][3] += v[3]; vatom_thr[tid][m][4] += v[4]; vatom_thr[tid][m][5] += v[5];
+    v_tally(thr->vatom_pair[i],v);
+    v_tally(thr->vatom_pair[j],v);
+    v_tally(thr->vatom_pair[k],v);
+    v_tally(thr->vatom_pair[m],v);
   }
 }
 
@@ -513,48 +642,248 @@ void ThrOMP::ev_tally4_thr(Pair *pair, int i, int j, int k, int m, double evdwl,
    changes v values by dividing by n
  ------------------------------------------------------------------------- */
 
-void ThrOMP::ev_tally_list_thr(Pair *pair, int n, int *list, double ecoul, double *v, int tid)
+void ThrOMP::ev_tally_list_thr(Pair * const pair, const int n,
+			       const int * const list, const double ecoul,
+			       const double * const v, ThrData * const thr)
 {
-  int i,j;
-
   if (pair->eflag_either) {
-    if (pair->eflag_global) eng_coul_thr[tid] += ecoul;
+    if (pair->eflag_global) thr->eng_coul += ecoul;
     if (pair->eflag_atom) {
-      double epairatom = ecoul/n;
-      for (i = 0; i < n; i++) eatom_thr[tid][list[i]] += epairatom;
+      double epairatom = ecoul/static_cast<double>(n);
+      for (int i = 0; i < n; i++) thr->eatom_pair[list[i]] += epairatom;
     }
   }
 
   if (pair->vflag_either) {
-    if (pair->vflag_global) {
-      virial_thr[tid][0] += v[0];
-      virial_thr[tid][1] += v[1];
-      virial_thr[tid][2] += v[2];
-      virial_thr[tid][3] += v[3];
-      virial_thr[tid][4] += v[4];
-      virial_thr[tid][5] += v[5];
-    }
+    if (pair->vflag_global)
+      v_tally(thr->virial_pair,v);
 
     if (pair->vflag_atom) {
-      v[0] /= n;
-      v[1] /= n;
-      v[2] /= n;
-      v[3] /= n;
-      v[4] /= n;
-      v[5] /= n;
-      for (i = 0; i < n; i++) {
-	j = list[i];
-	vatom_thr[tid][j][0] += v[0];
-	vatom_thr[tid][j][1] += v[1];
-	vatom_thr[tid][j][2] += v[2];
-	vatom_thr[tid][j][3] += v[3];
-	vatom_thr[tid][j][4] += v[4];
-	vatom_thr[tid][j][5] += v[5];
+      const double s = 1.0/static_cast<double>(n);
+      double vtmp[6];
+
+      vtmp[0] = s * v[0];
+      vtmp[1] = s * v[1];
+      vtmp[2] = s * v[2];
+      vtmp[3] = s * v[3];
+      vtmp[4] = s * v[4];
+      vtmp[5] = s * v[5];
+
+      for (int i = 0; i < n; i++) {
+	const int j = list[i];
+	v_tally(thr->vatom_pair[j],vtmp);
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   tally energy and virial into global and per-atom accumulators
+------------------------------------------------------------------------- */
+
+void ThrOMP::ev_tally_thr(Bond * const bond, const int i, const int j, const int nlocal,
+			  const int newton_bond, const double ebond, const double fbond,
+			  const double delx, const double dely, const double delz,
+			  ThrData * const thr)
+{
+  if (bond->eflag_either) {
+    const double ebondhalf = 0.5*ebond;
+    if (newton_bond) {
+      if (bond->eflag_global)
+	thr->eng_bond += ebond;
+      if (bond->eflag_atom) {
+	thr->eatom_bond[i] += ebondhalf;
+	thr->eatom_bond[j] += ebondhalf;
+      }
+    } else {
+      if (bond->eflag_global) {
+	if (i < nlocal) thr->eng_bond += ebondhalf;
+	if (j < nlocal) thr->eng_bond += ebondhalf;
+      }
+      if (bond->eflag_atom) {
+	if (i < nlocal) thr->eatom_bond[i] += ebondhalf;
+	if (j < nlocal) thr->eatom_bond[j] += ebondhalf;
+      }
+    }
+  }
+
+  if (bond->vflag_either) {
+    double v[6];
+
+    v[0] = delx*delx*fbond;
+    v[1] = dely*dely*fbond;
+    v[2] = delz*delz*fbond;
+    v[3] = delx*dely*fbond;
+    v[4] = delx*delz*fbond;
+    v[5] = dely*delz*fbond;
+
+    if (bond->vflag_global) {
+      if (newton_bond)
+	v_tally(thr->virial_bond,v);
+      else {
+	if (i < nlocal)
+	  v_tally(thr->virial_bond,0.5,v);
+	if (j < nlocal)
+	  v_tally(thr->virial_bond,0.5,v);
+      }
+    }
+
+    if (bond->vflag_atom) {
+      v[0] *= 0.5;
+      v[1] *= 0.5;
+      v[2] *= 0.5;
+      v[3] *= 0.5;
+      v[4] *= 0.5;
+      v[5] *= 0.5;
+
+      if (newton_bond) {
+	v_tally(thr->vatom_bond[i],v);
+	v_tally(thr->vatom_bond[j],v);
+      } else {
+	if (j < nlocal)
+	  v_tally(thr->vatom_bond[i],v);
+	if (j < nlocal)
+	  v_tally(thr->vatom_bond[j],v);
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   tally energy and virial into global and per-atom accumulators
+   virial = r1F1 + r2F2 + r3F3 = (r1-r2) F1 + (r3-r2) F3 = del1*f1 + del2*f3
+------------------------------------------------------------------------- */
+
+void ThrOMP::ev_tally_thr(Angle * const angle, const int i, const int j, const int k,
+			  const int nlocal, const int newton_bond, const double eangle,
+			  const double * const f1, const double * const f3,
+			  const double delx1, const double dely1, const double delz1,
+			  const double delx2, const double dely2, const double delz2,
+			  ThrData * const thr)
+{
+  if (angle->eflag_either) {
+    const double eanglethird = THIRD*eangle;
+    if (newton_bond) {
+      if (angle->eflag_global)
+	thr->eng_angle += eangle;
+      if (angle->eflag_atom) {
+	thr->eatom_angle[i] += eanglethird;
+	thr->eatom_angle[j] += eanglethird;
+	thr->eatom_angle[k] += eanglethird;
       }
+    } else {
+      if (angle->eflag_global) {
+	if (i < nlocal) thr->eng_angle += eanglethird;
+	if (j < nlocal) thr->eng_angle += eanglethird;
+	if (k < nlocal) thr->eng_angle += eanglethird;
+      }
+      if (angle->eflag_atom) {
+	if (i < nlocal) thr->eatom_angle[i] += eanglethird;
+	if (j < nlocal) thr->eatom_angle[j] += eanglethird;
+	if (k < nlocal) thr->eatom_angle[k] += eanglethird;
+      }
+    }
+  }
+
+  if (angle->vflag_either) {
+    double v[6];
+
+    v[0] = delx1*f1[0] + delx2*f3[0];
+    v[1] = dely1*f1[1] + dely2*f3[1];
+    v[2] = delz1*f1[2] + delz2*f3[2];
+    v[3] = delx1*f1[1] + delx2*f3[1];
+    v[4] = delx1*f1[2] + delx2*f3[2];
+    v[5] = dely1*f1[2] + dely2*f3[2];
+
+    if (angle->vflag_global) {
+      if (newton_bond) {
+	v_tally(thr->virial_angle,v);
+      } else {
+	int cnt = 0;
+	if (i < nlocal) ++cnt;
+	if (j < nlocal) ++cnt;
+	if (k < nlocal) ++cnt;
+	v_tally(thr->virial_angle,cnt*THIRD,v);
+      }
+    }
+
+    if (angle->vflag_atom) {
+      v[0] *= THIRD;
+      v[1] *= THIRD;
+      v[2] *= THIRD;
+      v[3] *= THIRD;
+      v[4] *= THIRD;
+      v[5] *= THIRD;
+
+      if (newton_bond) {
+	v_tally(thr->vatom_angle[i],v);
+	v_tally(thr->vatom_angle[j],v);
+	v_tally(thr->vatom_angle[k],v);
+      } else {
+	if (j < nlocal) v_tally(thr->vatom_angle[i],v);
+	if (j < nlocal) v_tally(thr->vatom_angle[j],v);
+	if (k < nlocal) v_tally(thr->vatom_angle[k],v);
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   tally energy and virial from 1-3 repulsion of SDK angle into accumulators
+------------------------------------------------------------------------- */
+
+void ThrOMP::ev_tally13_thr(Angle * const angle, const int i1, const int i3,
+			    const int nlocal, const int newton_bond,
+			    const double epair, const double fpair,
+			    const double delx, const double dely,
+			    const double delz, ThrData * const thr)
+{
+
+  if (angle->eflag_either) {
+    const double epairhalf = 0.5 * epair;
+
+    if (angle->eflag_global) {
+      if (newton_bond || i1 < nlocal)
+	thr->eng_angle += epairhalf;
+      if (newton_bond || i3 < nlocal)
+	thr->eng_angle += epairhalf;
+    }
+
+    if (angle->eflag_atom) {
+      if (newton_bond || i1 < nlocal) thr->eatom_angle[i1] += epairhalf;
+      if (newton_bond || i3 < nlocal) thr->eatom_angle[i3] += epairhalf;
     }
   }
+  
+  if (angle->vflag_either) {
+    double v[6];
+    v[0] = delx*delx*fpair;
+    v[1] = dely*dely*fpair;
+    v[2] = delz*delz*fpair;
+    v[3] = delx*dely*fpair;
+    v[4] = delx*delz*fpair;
+    v[5] = dely*delz*fpair;
+
+    if (angle->vflag_global) {
+      double * const va = thr->virial_angle;
+      if (newton_bond || i1 < nlocal) v_tally(va,0.5,v);
+      if (newton_bond || i3 < nlocal) v_tally(va,0.5,v);
+    }
+
+    if (angle->vflag_atom) {
+      if (newton_bond || i1 < nlocal) {
+	double * const va = thr->vatom_angle[i1];
+	v_tally(va,0.5,v);
+      }
+      if (newton_bond || i3 < nlocal) {
+	double * const va = thr->vatom_angle[i3];
+	v_tally(va,0.5,v);
+      }
+    }
+  }  
 }
 
+
 /* ----------------------------------------------------------------------
    tally energy and virial into global and per-atom accumulators
    virial = r1F1 + r2F2 + r3F3 + r4F4 = (r1-r2) F1 + (r3-r2) F3 + (r4-r2) F4
@@ -562,40 +891,48 @@ void ThrOMP::ev_tally_list_thr(Pair *pair, int n, int *list, double ecoul, doubl
 	  = vb1*f1 + vb2*f3 + (vb3+vb2)*f4
 ------------------------------------------------------------------------- */
 
-void ThrOMP::ev_tally_thr(Dihedral *dihed, int i1, int i2, int i3, int i4,
-			  int nlocal, int newton_bond,
-			  double edihedral, double *f1, double *f3, double *f4,
-			  double vb1x, double vb1y, double vb1z,
-			  double vb2x, double vb2y, double vb2z,
-			  double vb3x, double vb3y, double vb3z, int tid)
+void ThrOMP::ev_tally_thr(Dihedral * const dihed, const int i1, const int i2,
+			  const int i3, const int i4, const int nlocal,
+			  const int newton_bond, const double edihedral,
+			  const double * const f1, const double * const f3,
+			  const double * const f4, const double vb1x,
+			  const double vb1y, const double vb1z, const double vb2x,
+			  const double vb2y, const double vb2z, const double vb3x,
+			  const double vb3y, const double vb3z, ThrData * const thr)
 {
-  double edihedralquarter,v[6];
-  int cnt;
 
   if (dihed->eflag_either) {
     if (dihed->eflag_global) {
       if (newton_bond) {
-	eng_bond_thr[tid] += edihedral;
+	thr->eng_dihed += edihedral;
       } else {
-	edihedralquarter = 0.25*edihedral;
-	cnt = 0;
+	const double edihedralquarter = 0.25*edihedral;
+	int cnt = 0;
 	if (i1 < nlocal) ++cnt;
 	if (i2 < nlocal) ++cnt;
 	if (i3 < nlocal) ++cnt;
 	if (i4 < nlocal) ++cnt;
-	eng_bond_thr[tid] += static_cast<double>(cnt) * edihedralquarter;
+	thr->eng_dihed += static_cast<double>(cnt)*edihedralquarter;
       }
     }
     if (dihed->eflag_atom) {
-      edihedralquarter = 0.25*edihedral;
-      if (newton_bond || i1 < nlocal) eatom_thr[tid][i1] += edihedralquarter;
-      if (newton_bond || i2 < nlocal) eatom_thr[tid][i2] += edihedralquarter;
-      if (newton_bond || i3 < nlocal) eatom_thr[tid][i3] += edihedralquarter;
-      if (newton_bond || i4 < nlocal) eatom_thr[tid][i4] += edihedralquarter;
+      const double edihedralquarter = 0.25*edihedral;
+      if (newton_bond) {
+	thr->eatom_dihed[i1] += edihedralquarter;
+	thr->eatom_dihed[i2] += edihedralquarter;
+	thr->eatom_dihed[i3] += edihedralquarter;
+	thr->eatom_dihed[i4] += edihedralquarter;
+      } else {
+	if (i1 < nlocal) thr->eatom_dihed[i1] +=  edihedralquarter;
+	if (i2 < nlocal) thr->eatom_dihed[i2] +=  edihedralquarter;
+	if (i3 < nlocal) thr->eatom_dihed[i3] +=  edihedralquarter;
+	if (i4 < nlocal) thr->eatom_dihed[i4] +=  edihedralquarter;
+      }
     }
   }
 
   if (dihed->vflag_either) {
+    double v[6];
     v[0] = vb1x*f1[0] + vb2x*f3[0] + (vb3x+vb2x)*f4[0];
     v[1] = vb1y*f1[1] + vb2y*f3[1] + (vb3y+vb2y)*f4[1];
     v[2] = vb1z*f1[2] + vb2z*f3[2] + (vb3z+vb2z)*f4[2];
@@ -605,80 +942,127 @@ void ThrOMP::ev_tally_thr(Dihedral *dihed, int i1, int i2, int i3, int i4,
 
     if (dihed->vflag_global) {
       if (newton_bond) {
-	virial_thr[tid][0] += v[0];
-	virial_thr[tid][1] += v[1];
-	virial_thr[tid][2] += v[2];
-	virial_thr[tid][3] += v[3];
-	virial_thr[tid][4] += v[4];
-	virial_thr[tid][5] += v[5];
+	v_tally(thr->virial_dihed,v);
       } else {
-	if (i1 < nlocal) {
-	  virial_thr[tid][0] += 0.25*v[0];
-	  virial_thr[tid][1] += 0.25*v[1];
-	  virial_thr[tid][2] += 0.25*v[2];
-	  virial_thr[tid][3] += 0.25*v[3];
-	  virial_thr[tid][4] += 0.25*v[4];
-	  virial_thr[tid][5] += 0.25*v[5];
-	}
-	if (i2 < nlocal) {
-	  virial_thr[tid][0] += 0.25*v[0];
-	  virial_thr[tid][1] += 0.25*v[1];
-	  virial_thr[tid][2] += 0.25*v[2];
-	  virial_thr[tid][3] += 0.25*v[3];
-	  virial_thr[tid][4] += 0.25*v[4];
-	  virial_thr[tid][5] += 0.25*v[5];
-	}
-	if (i3 < nlocal) {
-	  virial_thr[tid][0] += 0.25*v[0];
-	  virial_thr[tid][1] += 0.25*v[1];
-	  virial_thr[tid][2] += 0.25*v[2];
-	  virial_thr[tid][3] += 0.25*v[3];
-	  virial_thr[tid][4] += 0.25*v[4];
-	  virial_thr[tid][5] += 0.25*v[5];
-	}
-	if (i4 < nlocal) {
-	  virial_thr[tid][0] += 0.25*v[0];
-	  virial_thr[tid][1] += 0.25*v[1];
-	  virial_thr[tid][2] += 0.25*v[2];
-	  virial_thr[tid][3] += 0.25*v[3];
-	  virial_thr[tid][4] += 0.25*v[4];
-	  virial_thr[tid][5] += 0.25*v[5];
-	}
+	int cnt = 0;
+	if (i1 < nlocal) ++cnt;
+	if (i2 < nlocal) ++cnt;
+	if (i3 < nlocal) ++cnt;
+	if (i4 < nlocal) ++cnt;
+	v_tally(thr->virial_dihed,0.25*static_cast<double>(cnt),v);
       }
     }
 
+    v[0] *= 0.25;
+    v[1] *= 0.25;
+    v[2] *= 0.25;
+    v[3] *= 0.25;
+    v[4] *= 0.25;
+    v[5] *= 0.25;
+    
     if (dihed->vflag_atom) {
-      if (newton_bond || i1 < nlocal) {
-	vatom_thr[tid][i1][0] += 0.25*v[0];
-	vatom_thr[tid][i1][1] += 0.25*v[1];
-	vatom_thr[tid][i1][2] += 0.25*v[2];
-	vatom_thr[tid][i1][3] += 0.25*v[3];
-	vatom_thr[tid][i1][4] += 0.25*v[4];
-	vatom_thr[tid][i1][5] += 0.25*v[5];
-      }
-      if (newton_bond || i2 < nlocal) {
-	vatom_thr[tid][i2][0] += 0.25*v[0];
-	vatom_thr[tid][i2][1] += 0.25*v[1];
-	vatom_thr[tid][i2][2] += 0.25*v[2];
-	vatom_thr[tid][i2][3] += 0.25*v[3];
-	vatom_thr[tid][i2][4] += 0.25*v[4];
-	vatom_thr[tid][i2][5] += 0.25*v[5];
+      if (newton_bond) {
+	v_tally(thr->vatom_dihed[i1],v);
+	v_tally(thr->vatom_dihed[i2],v);
+	v_tally(thr->vatom_dihed[i3],v);
+	v_tally(thr->vatom_dihed[i4],v);
+      } else {
+	if (i1 < nlocal) v_tally(thr->vatom_dihed[i1],v);
+	if (i2 < nlocal) v_tally(thr->vatom_dihed[i2],v);
+	if (i3 < nlocal) v_tally(thr->vatom_dihed[i3],v);
+	if (i4 < nlocal) v_tally(thr->vatom_dihed[i4],v);
       }
-      if (newton_bond || i3 < nlocal) {
-	vatom_thr[tid][i3][0] += 0.25*v[0];
-	vatom_thr[tid][i3][1] += 0.25*v[1];
-	vatom_thr[tid][i3][2] += 0.25*v[2];
-	vatom_thr[tid][i3][3] += 0.25*v[3];
-	vatom_thr[tid][i3][4] += 0.25*v[4];
-	vatom_thr[tid][i3][5] += 0.25*v[5];
-      }
-      if (newton_bond || i4 < nlocal) {
-	vatom_thr[tid][i4][0] += 0.25*v[0];
-	vatom_thr[tid][i4][1] += 0.25*v[1];
-	vatom_thr[tid][i4][2] += 0.25*v[2];
-	vatom_thr[tid][i4][3] += 0.25*v[3];
-	vatom_thr[tid][i4][4] += 0.25*v[4];
-	vatom_thr[tid][i4][5] += 0.25*v[5];
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   tally energy and virial into global and per-atom accumulators
+   virial = r1F1 + r2F2 + r3F3 + r4F4 = (r1-r2) F1 + (r3-r2) F3 + (r4-r2) F4
+          = (r1-r2) F1 + (r3-r2) F3 + (r4-r3 + r3-r2) F4
+	  = vb1*f1 + vb2*f3 + (vb3+vb2)*f4
+------------------------------------------------------------------------- */
+
+void ThrOMP::ev_tally_thr(Improper * const imprp, const int i1, const int i2,
+			  const int i3, const int i4, const int nlocal,
+			  const int newton_bond, const double eimproper,
+			  const double * const f1, const double * const f3,
+			  const double * const f4, const double vb1x,
+			  const double vb1y, const double vb1z, const double vb2x,
+			  const double vb2y, const double vb2z, const double vb3x,
+			  const double vb3y, const double vb3z, ThrData * const thr)
+{
+
+  if (imprp->eflag_either) {
+    if (imprp->eflag_global) {
+      if (newton_bond) {
+	thr->eng_imprp += eimproper;
+      } else {
+	const double eimproperquarter = 0.25*eimproper;
+	int cnt = 0;
+	if (i1 < nlocal) ++cnt;
+	if (i2 < nlocal) ++cnt;
+	if (i3 < nlocal) ++cnt;
+	if (i4 < nlocal) ++cnt;
+	thr->eng_imprp += static_cast<double>(cnt)*eimproperquarter;
+      }
+    }
+    if (imprp->eflag_atom) {
+      const double eimproperquarter = 0.25*eimproper;
+      if (newton_bond) {
+	thr->eatom_imprp[i1] += eimproperquarter;
+	thr->eatom_imprp[i2] += eimproperquarter;
+	thr->eatom_imprp[i3] += eimproperquarter;
+	thr->eatom_imprp[i4] += eimproperquarter;
+      } else {
+	if (i1 < nlocal) thr->eatom_imprp[i1] +=  eimproperquarter;
+	if (i2 < nlocal) thr->eatom_imprp[i2] +=  eimproperquarter;
+	if (i3 < nlocal) thr->eatom_imprp[i3] +=  eimproperquarter;
+	if (i4 < nlocal) thr->eatom_imprp[i4] +=  eimproperquarter;
+      }
+    }
+  }
+
+  if (imprp->vflag_either) {
+    double v[6];
+    v[0] = vb1x*f1[0] + vb2x*f3[0] + (vb3x+vb2x)*f4[0];
+    v[1] = vb1y*f1[1] + vb2y*f3[1] + (vb3y+vb2y)*f4[1];
+    v[2] = vb1z*f1[2] + vb2z*f3[2] + (vb3z+vb2z)*f4[2];
+    v[3] = vb1x*f1[1] + vb2x*f3[1] + (vb3x+vb2x)*f4[1];
+    v[4] = vb1x*f1[2] + vb2x*f3[2] + (vb3x+vb2x)*f4[2];
+    v[5] = vb1y*f1[2] + vb2y*f3[2] + (vb3y+vb2y)*f4[2];
+
+    if (imprp->vflag_global) {
+      if (newton_bond) {
+	v_tally(thr->virial_imprp,v);
+      } else {
+	int cnt = 0;
+	if (i1 < nlocal) ++cnt;
+	if (i2 < nlocal) ++cnt;
+	if (i3 < nlocal) ++cnt;
+	if (i4 < nlocal) ++cnt;
+	v_tally(thr->virial_imprp,0.25*static_cast<double>(cnt),v);
+      }
+    }
+
+    v[0] *= 0.25;
+    v[1] *= 0.25;
+    v[2] *= 0.25;
+    v[3] *= 0.25;
+    v[4] *= 0.25;
+    v[5] *= 0.25;
+    
+    if (imprp->vflag_atom) {
+      if (newton_bond) {
+	v_tally(thr->vatom_imprp[i1],v);
+	v_tally(thr->vatom_imprp[i2],v);
+	v_tally(thr->vatom_imprp[i3],v);
+	v_tally(thr->vatom_imprp[i4],v);
+      } else {
+	if (i1 < nlocal) v_tally(thr->vatom_imprp[i1],v);
+	if (i2 < nlocal) v_tally(thr->vatom_imprp[i2],v);
+	if (i3 < nlocal) v_tally(thr->vatom_imprp[i3],v);
+	if (i4 < nlocal) v_tally(thr->vatom_imprp[i4],v);
       }
     }
   }
@@ -690,7 +1074,8 @@ void ThrOMP::ev_tally_thr(Dihedral *dihed, int i1, int i2, int i3, int i4,
    fpair is magnitude of force on atom I
 ------------------------------------------------------------------------- */
 
-void ThrOMP::v_tally2_thr(int i, int j, double fpair, double *drij, int tid)
+void ThrOMP::v_tally2_thr(const int i, const int j, const double fpair,
+			  const double * const drij, ThrData * const thr)
 {
   double v[6];
   
@@ -701,10 +1086,8 @@ void ThrOMP::v_tally2_thr(int i, int j, double fpair, double *drij, int tid)
   v[4] = 0.5 * drij[0]*drij[2]*fpair;
   v[5] = 0.5 * drij[1]*drij[2]*fpair;
 
-  vatom_thr[tid][i][0] += v[0]; vatom_thr[tid][i][1] += v[1]; vatom_thr[tid][i][2] += v[2];
-  vatom_thr[tid][i][3] += v[3]; vatom_thr[tid][i][4] += v[4]; vatom_thr[tid][i][5] += v[5];
-  vatom_thr[tid][j][0] += v[0]; vatom_thr[tid][j][1] += v[1]; vatom_thr[tid][j][2] += v[2];
-  vatom_thr[tid][j][3] += v[3]; vatom_thr[tid][j][4] += v[4]; vatom_thr[tid][j][5] += v[5];
+  v_tally(thr->vatom_pair[i],v);
+  v_tally(thr->vatom_pair[j],v);
 }
 
 /* ----------------------------------------------------------------------
@@ -712,8 +1095,10 @@ void ThrOMP::v_tally2_thr(int i, int j, double fpair, double *drij, int tid)
    called by AIREBO and Tersoff potential, newton_pair is always on
 ------------------------------------------------------------------------- */
 
-void ThrOMP::v_tally3_thr(int i, int j, int k, double *fi, double *fj,
-			  double *drik, double *drjk, int tid)
+void ThrOMP::v_tally3_thr(const int i, const int j, const int k,
+			  const double * const fi, const double * const fj,
+			  const double * const drik, const double * const drjk,
+			  ThrData * const thr)
 {
   double v[6];
   
@@ -724,12 +1109,9 @@ void ThrOMP::v_tally3_thr(int i, int j, int k, double *fi, double *fj,
   v[4] = THIRD * (drik[0]*fi[2] + drjk[0]*fj[2]);
   v[5] = THIRD * (drik[1]*fi[2] + drjk[1]*fj[2]);
 
-  vatom_thr[tid][i][0] += v[0]; vatom_thr[tid][i][1] += v[1]; vatom_thr[tid][i][2] += v[2];
-  vatom_thr[tid][i][3] += v[3]; vatom_thr[tid][i][4] += v[4]; vatom_thr[tid][i][5] += v[5];
-  vatom_thr[tid][j][0] += v[0]; vatom_thr[tid][j][1] += v[1]; vatom_thr[tid][j][2] += v[2];
-  vatom_thr[tid][j][3] += v[3]; vatom_thr[tid][j][4] += v[4]; vatom_thr[tid][j][5] += v[5];
-  vatom_thr[tid][k][0] += v[0]; vatom_thr[tid][k][1] += v[1]; vatom_thr[tid][k][2] += v[2];
-  vatom_thr[tid][k][3] += v[3]; vatom_thr[tid][k][4] += v[4]; vatom_thr[tid][k][5] += v[5];
+  v_tally(thr->vatom_pair[i],v);
+  v_tally(thr->vatom_pair[j],v);
+  v_tally(thr->vatom_pair[k],v);
 }
 
 /* ----------------------------------------------------------------------
@@ -737,9 +1119,11 @@ void ThrOMP::v_tally3_thr(int i, int j, int k, double *fi, double *fj,
    called by AIREBO potential, newton_pair is always on
 ------------------------------------------------------------------------- */
 
-void ThrOMP::v_tally4_thr(int i, int j, int k, int m,
-			  double *fi, double *fj, double *fk,
-			  double *drim, double *drjm, double *drkm, int tid)
+void ThrOMP::v_tally4_thr(const int i, const int j, const int k, const int m,
+			  const double * const fi, const double * const fj,
+			  const double * const fk, const double * const drim,
+			  const double * const drjm, const double * const drkm,
+			  ThrData * const thr)
 {
   double v[6];
 
@@ -750,84 +1134,17 @@ void ThrOMP::v_tally4_thr(int i, int j, int k, int m,
   v[4] = 0.25 * (drim[0]*fi[2] + drjm[0]*fj[2] + drkm[0]*fk[2]);
   v[5] = 0.25 * (drim[1]*fi[2] + drjm[1]*fj[2] + drkm[1]*fk[2]);
 
-  vatom_thr[tid][i][0] += v[0]; vatom_thr[tid][i][1] += v[1]; vatom_thr[tid][i][2] += v[2];
-  vatom_thr[tid][i][3] += v[3]; vatom_thr[tid][i][4] += v[4]; vatom_thr[tid][i][5] += v[5];
-  vatom_thr[tid][j][0] += v[0]; vatom_thr[tid][j][1] += v[1]; vatom_thr[tid][j][2] += v[2];
-  vatom_thr[tid][j][3] += v[3]; vatom_thr[tid][j][4] += v[4]; vatom_thr[tid][j][5] += v[5];
-  vatom_thr[tid][k][0] += v[0]; vatom_thr[tid][k][1] += v[1]; vatom_thr[tid][k][2] += v[2];
-  vatom_thr[tid][k][3] += v[3]; vatom_thr[tid][k][4] += v[4]; vatom_thr[tid][k][5] += v[5];
-  vatom_thr[tid][m][0] += v[0]; vatom_thr[tid][m][1] += v[1]; vatom_thr[tid][m][2] += v[2];
-  vatom_thr[tid][m][3] += v[3]; vatom_thr[tid][m][4] += v[4]; vatom_thr[tid][m][5] += v[5];
-}
-
-/* ---------------------------------------------------------------------- */
-
-// set loop range thread id, and force array offset for threaded runs.
-double **ThrOMP::loop_setup_thr(double **f, int &ifrom, int &ito, int &tid,
-				int inum, int nall, int nthreads)
-{
-#if defined(_OPENMP)
-  tid = omp_get_thread_num();
-
-  // each thread works on a fixed chunk of atoms.
-  const int idelta = 1 + inum/nthreads;
-  ifrom = tid*idelta;
-  ito   = ifrom + idelta;
-  if (ito > inum)
-    ito = inum;
-
-  return f + nall*tid;
-#else
-  tid = 0;
-  ifrom = 0;
-  ito = inum;
-  return f;
-#endif
-}
-
-/* ---------------------------------------------------------------------- */
-
-// reduce per thread data into the first part of the data
-// array that is used for the non-threaded parts and reset
-// the temporary storage to 0.0. this routine depends on
-// multi-dimensional arrays like force stored in this order
-// x1,y1,z1,x2,y2,z2,...
-// we need to post a barrier to wait until all threads are done
-// with writing to the array .
-void ThrOMP::data_reduce_thr(double *dall, int nall, int nthreads,
-			     int ndim, int tid)
-{
-#if defined(_OPENMP)
-  // NOOP in non-threaded execution.
-  if (nthreads == 1) return;
-#pragma omp barrier
-  {
-    const int nvals = ndim*nall;
-    const int idelta = nvals/nthreads + 1;
-    const int ifrom = tid*idelta;
-    const int ito   = ((ifrom + idelta) > nvals) ? nvals : (ifrom + idelta);
-
-    for (int m = ifrom; m < ito; ++m) {
-      for (int n = 1; n < nthreads; ++n) {
-	dall[m] += dall[n*nvals + m];
-	dall[n*nvals + m] = 0.0;
-      }
-    }
-  }
-#else
-  // NOOP in non-threaded execution.
-  return;
-#endif
+  v_tally(thr->vatom_pair[i],v);
+  v_tally(thr->vatom_pair[j],v);
+  v_tally(thr->vatom_pair[k],v);
+  v_tally(thr->vatom_pair[m],v);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double ThrOMP::memory_usage_thr() 
 {
-  const int nthreads=lmp->comm->nthreads;
-
-  double bytes = nthreads * (3 + 7) * sizeof(double);
-  bytes += nthreads * maxeatom_thr * sizeof(double);
-  bytes += nthreads * maxvatom_thr * 6 * sizeof(double);
+  double bytes=0.0;
+  
   return bytes;
 }
diff --git a/src/USER-OMP/thr_omp.h b/src/USER-OMP/thr_omp.h
index 9966c9de00..32f7045124 100644
--- a/src/USER-OMP/thr_omp.h
+++ b/src/USER-OMP/thr_omp.h
@@ -19,39 +19,28 @@
 #define LMP_THR_OMP_H
 
 #include "pointers.h"
+#include "fix_omp.h"
+#include "thr_data.h"
 
 namespace LAMMPS_NS {
 
 // forward declarations
 class Pair;
+class Bond;
+class Angle;
 class Dihedral;
+class Improper;
+class KSpace;
+class Fix;
 
 class ThrOMP {
- public:
-  struct global {
-    double eng_vdwl;
-    double eng_coul;
-    double eng_bond;
-    double virial[6];
-  };
 
  protected:
-  const int thr_style;
-  enum {PAIR=1, BOND, ANGLE, DIHEDRAL, IMPROPER, KSPACE, FIX, COMPUTE};
-
-  LAMMPS *lmp;           // reference to base lammps object.
-
-  double *eng_vdwl_thr;  // per thread accumulated vdw energy
-  double *eng_coul_thr;  // per thread accumulated coulomb energies
-  double *eng_bond_thr;  // per thread accumlated bonded energy
+  LAMMPS *lmp; // reference to base lammps object.
+  FixOMP *fix; // pointer to fix_omp;
 
-  double **virial_thr;   // per thread virial
-  double **eatom_thr;    // per thread per atom energy
-  double ***vatom_thr;   // per thread per atom virial
+  const int thr_style;
 
-  int maxeatom_thr, maxvatom_thr;
-  int evflag_global, evflag_atom;
-  
  public:
   ThrOMP(LAMMPS *, int);
   virtual ~ThrOMP();
@@ -65,50 +54,105 @@ class ThrOMP {
       { ; }
     };
 
+  enum {THR_NONE=0,THR_PAIR=1,THR_BOND=1<<1,THR_ANGLE=1<<2,
+	THR_DIHEDRAL=1<<3,THR_IMPROPER=1<<4,THR_KSPACE=1<<5,
+	THR_CHARMM=1<<6,THR_PROXY=1<<7,THR_HYBRID=1<<8,THR_FIX=1<<9};
+
  protected:
-  // extra ev_tally work for threaded styles
-  void ev_setup_thr(Pair *);
-  void ev_setup_thr(Dihedral *);
+  // extra ev_tally setup work for threaded styles
+  void ev_setup_thr(int, int, int, double *, double **, ThrData *);
 
-  void ev_reduce_thr(Pair *);
-  void ev_reduce_thr(Dihedral *);
+  // compute global per thread virial contribution from per-thread force
+  void virial_fdotr_compute_thr(double * const, const double * const * const, 
+				const double * const * const,
+				const int, const int, const int);
 
- private:
-  // internal method to be used by multiple ev_setup_thr() methods
-  void ev_setup_acc_thr(int, int, int, int, int, int);
+  // reduce per thread data as needed
+  void reduce_thr(void * const style, const int eflag, const int vflag, ThrData * const thr, const int nproxy=0);
 
  protected:
+
   // threading adapted versions of the ev_tally infrastructure
   // style specific versions (need access to style class flags)
-  void ev_tally_thr(Pair *, int, int, int, int, double, double,
-		    double, double, double, double, int);
-  void ev_tally_xyz_thr(Pair *, int, int, int, int, double, double,
-			double, double, double, double, double, double, int);
-  void ev_tally3_thr(Pair *, int, int, int, double, double,
-		     double *, double *, double *, double *, int);
-  void ev_tally4_thr(Pair *, int, int, int, int, double, 
-		     double *, double *, double *,
-		     double *, double *, double *, int);
-  void ev_tally_list_thr(Pair *, int, int *, double , double *, int);
-
-  void ev_tally_thr(Dihedral *, int, int, int, int, int, int, double,
-		    double *, double *, double *, double, double, double,
-		    double, double, double, double, double, double, int);
 
-  // style independent versions
-  void v_tally2_thr(int, int, double, double *, int);
-  void v_tally3_thr(int, int, int, double *, double *, double *, double *, int);
-  void v_tally4_thr(int, int, int, int, double *, double *, double *,
-		    double *, double *, double *, int);
+  // Pair
+  void e_tally_thr(Pair * const, const int, const int, const int,
+		   const int, const double, const double, ThrData * const);
+  void v_tally_thr(Pair * const, const int, const int, const int,
+		   const int, const double * const, ThrData * const);
+
+  void ev_tally_thr(Pair * const, const int, const int, const int, const int,
+		    const double, const double, const double, const double,
+		    const double, const double, ThrData * const);
+  void ev_tally_xyz_thr(Pair * const, const int, const int, const int,
+			const int, const double, const double, const double,
+			const double, const double, const double,
+			const double, const double, ThrData * const);
+  void ev_tally3_thr(Pair * const, const int, const int, const int, const double,
+		     const double, const double * const, const double * const,
+		     const double * const, const double * const, ThrData * const);
+  void ev_tally4_thr(Pair * const, const int, const int, const int, const int,
+		     const double, const double * const, const double * const,
+		     const double * const, const double * const, const double * const,
+		     const double * const, ThrData * const);
+
+  // Bond
+  void ev_tally_thr(Bond * const, const int, const int, const int, const int,
+		    const double, const double, const double, const double,
+		    const double, ThrData * const);
+
+  // Angle
+  void ev_tally_thr(Angle * const, const int, const int, const int, const int, const int,
+		    const double, const double * const, const double * const,
+		    const double, const double, const double, const double, const double,
+		    const double, ThrData * const thr);
+  void ev_tally13_thr(Angle * const, const int, const int, const int, const int,
+		      const double, const double, const double, const double,
+		      const double, ThrData * const thr);
+
+  // Dihedral
+  void ev_tally_thr(Dihedral * const, const int, const int, const int, const int, const int,
+		    const int, const double, const double * const, const double * const,
+		    const double * const, const double, const double, const double,
+		    const double, const double, const double, const double, const double,
+		    const double, ThrData * const);
+
+  // Improper
+  void ev_tally_thr(Improper * const, const int, const int, const int, const int, const int,
+		    const int, const double, const double * const, const double * const,
+		    const double * const, const double, const double, const double,
+		    const double, const double, const double, const double, const double,
+		    const double, ThrData * const);
 
- protected:
-  // set loop range, thread id, and force array offset for threaded runs.
-  double **loop_setup_thr(double **, int &, int &, int &, int, int, int);
-
-  // reduce per thread data into the first part of the array
-  void data_reduce_thr(double *, int, int, int, int);
+  // style independent versions
+  void v_tally2_thr(const int, const int, const double, const double * const, ThrData * const);
+  void v_tally3_thr(const int, const int, const int, const double * const, const double * const,
+		    const double * const, const double * const, ThrData * const);
+  void v_tally4_thr(const int, const int, const int, const int, const double * const,
+		    const double * const, const double * const, const double * const,
+		    const double * const, const double * const, ThrData * const);
+  void ev_tally_list_thr(Pair * const, const int, const int * const,
+			 const double , const double * const , ThrData * const);
 
 };
 
+// set loop range thread id, and force array offset for threaded runs.
+static inline void loop_setup_thr(int &ifrom, int &ito, int &tid,
+				  int inum, int nthreads, int nproxy=0)
+{
+#if defined(_OPENMP)
+  tid = omp_get_thread_num();
+
+  // each thread works on a fixed chunk of atoms.
+  const int idelta = 1 + inum/(nthreads-nproxy);
+  ifrom = (tid-nproxy)*idelta;
+  ito   = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
+#else
+  tid = 0;
+  ifrom = 0;
+  ito = inum;
+#endif
+}
+
 }
 #endif
-- 
GitLab