diff --git a/src/USER-OMP/Install.sh b/src/USER-OMP/Install.sh
index db0beb52181f60d1a5180aca29a97ea927ef437b..ec6fac4b92e2ed87c42320b8a63d102e19fe7fb5 100644
--- a/src/USER-OMP/Install.sh
+++ b/src/USER-OMP/Install.sh
@@ -1,10 +1,11 @@
 # Install/unInstall package files in LAMMPS
 # do not install child files if parent does not exist
 
-for file in *_omp.cpp *_omp.h; do
+for file in *_omp.cpp *_omp.h  pppm*proxy.h pppm*proxy.cpp; do
     # let us see if the "rain man" can count the toothpicks...
-   ofile=`echo $file | sed -e s,\\\\\\(.\\*\\\\\\)_omp\\\\.\\\\\\(h\\\\\\|cpp\\\\\\),\\\\1.\\\\2,`
-
+   ofile=`echo $file | sed  -e s,_pppm_tip4p_omp,_long_tip4p_omp, \
+   -e s,pppm.\\*_proxy,pppm_omp, -e s,_pppm_omp,_long_omp, \
+   -e s,\\\\\\(.\\*\\\\\\)_omp\\\\.\\\\\\(h\\\\\\|cpp\\\\\\),\\\\1.\\\\2,`
   if (test $1 = 1) then
     if (test $file = "thr_omp.h") || (test $file = "thr_omp.cpp") then
       :  # always install those files.
@@ -18,3 +19,15 @@ for file in *_omp.cpp *_omp.h; do
     rm -f ../$file
   fi
 done
+
+if (test $1 = 1) then
+
+  cp thr_data.h ..
+  cp thr_data.cpp ..
+
+elif (test $1 = 0) then
+
+  rm -f ../thr_data.h
+  rm -f ../thr_data.cpp
+
+fi
diff --git a/src/USER-OMP/Package.sh b/src/USER-OMP/Package.sh
index 5a004c918709ca6dfc2bf7d98dc5e5699fd0a26e..6f577b2791653f29b646e3881821eccf89352954 100644
--- a/src/USER-OMP/Package.sh
+++ b/src/USER-OMP/Package.sh
@@ -1,22 +1,40 @@
 # Update package files in LAMMPS
-# cp package file to src if doesn't exist or is different
-# do not copy certain files if non-OMP versions do not exist
-# do remove OpenMP style files that have no matching
-#   non-OpenMP version installed, e.g. after a package has been removed
-
-for file in *_omp.cpp *_omp.h; do
+# copy package file to src if it doesn't exists or is different
+# do not copy OpenMP style files, if a non-OpenMP version does 
+# not exist. Do remove OpenMP style files that have no matching
+# non-OpenMP version installed, e.g. after a package has been
+# removed
+for file in *_omp.cpp *_omp.h pppm*proxy.h pppm*proxy.cpp thr_data.h thr_data.cpp; do
   # let us see if the "rain man" can count the toothpicks...
-  ofile=`echo $file | sed -e s,\\\\\\(.\\*\\\\\\)_omp\\\\.\\\\\\(h\\\\\\|cpp\\\\\\),\\\\1.\\\\2,`
-  if (test $file = "thr_omp.h") || (test $file = "thr_omp.cpp") then
-    :  # always check for those files.
+   ofile=`echo $file | sed  -e s,_pppm_tip4p_omp,_long_tip4p_omp, \
+   -e s,pppm.\\*_proxy,pppm_omp, -e s,_pppm_omp,_long_omp, \
+   -e s,\\\\\\(.\\*\\\\\\)_omp\\\\.\\\\\\(h\\\\\\|cpp\\\\\\),\\\\1.\\\\2,`
+  if (test $file = "thr_omp.h") || (test $file = "thr_omp.cpp") \
+      || (test $file = "thr_data.h") || (test $file = "thr_data.cpp") then
+    if (test ! -e ../$file) then
+      echo "  creating src/$file"
+      cp $file ..
+    elif ! cmp -s $file ../$file ; then
+      echo "  updating src/$file"
+      cp $file ..
+    fi
   elif (test ! -e ../$ofile) then
     if (test -e ../$file) then
       echo "  removing src/$file"
       rm -f ../$file
     fi
-    continue
+  else
+    if (test ! -e ../$file) then
+      echo "  creating src/$file"
+      cp $file ..
+    elif ! cmp -s $file ../$file ; then
+      echo "  updating src/$file"
+      cp $file ..
+    fi
   fi
+done
 
+for file in thr_data.h thr_data.cpp; do
   if (test ! -e ../$file) then
     echo "  creating src/$file"
     cp $file ..
diff --git a/src/USER-OMP/dihedral_charmm_omp.cpp b/src/USER-OMP/dihedral_charmm_omp.cpp
index 63bfc432700b40badc2fee8bb127c8be81415f95..b4d7e2e4adcb854f9f048455a49922eb0b75b5f6 100644
--- a/src/USER-OMP/dihedral_charmm_omp.cpp
+++ b/src/USER-OMP/dihedral_charmm_omp.cpp
@@ -40,7 +40,6 @@ void DihedralCharmmOMP::compute(int eflag, int vflag)
 
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = 0;
 
   // insure pair->ev_tally() will use 1-4 virial contribution
@@ -53,43 +52,34 @@ void DihedralCharmmOMP::compute(int eflag, int vflag)
   const int inum = neighbor->ndihedrallist;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-
-  // reduce contributions to non-bonded energy terms
-  for (int n = 0; n < nthreads; ++n) {
-    force->pair->eng_vdwl += eng_vdwl_thr[n];
-    force->pair->eng_coul += eng_coul_thr[n];
-  }
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_BOND>
-void DihedralCharmmOMP::eval(double **f, int nfrom, int nto, int tid)
+void DihedralCharmmOMP::eval(int nfrom, int nto, ThrData * const thr)
 {
   
   int i1,i2,i3,i4,i,m,n,type;
@@ -105,12 +95,13 @@ void DihedralCharmmOMP::eval(double **f, int nfrom, int nto, int tid)
 
   edihedral = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  int *atomtype = atom->type;
-  int **dihedrallist = neighbor->dihedrallist;
-  int nlocal = atom->nlocal;
-  double qqrd2e = force->qqrd2e;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const atomtype = atom->type;
+  const int * const * const dihedrallist = neighbor->dihedrallist;
+  const double qqrd2e = force->qqrd2e;
+  const int nlocal = atom->nlocal;
 
   for (n = nfrom; n < nto; n++) {
     i1 = dihedrallist[n][0];
@@ -176,7 +167,7 @@ void DihedralCharmmOMP::eval(double **f, int nfrom, int nto, int tid)
       if (screen) {
 	char str[128];
 	sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d",
-		me,tid,update->ntimestep,
+		me,thr->get_tid(),update->ntimestep,
 		atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
 	error->warning(FLERR,str,0);
 	fprintf(screen,"  1st atom: %d %g %g %g\n",
@@ -282,7 +273,7 @@ void DihedralCharmmOMP::eval(double **f, int nfrom, int nto, int tid)
 
     if (EVFLAG)
       ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4,
-		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid);
+		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr);
     // 1-4 LJ and Coulomb interactions
     // tally energy/virial in pair, using newton_bond as newton flag
 
@@ -321,7 +312,7 @@ void DihedralCharmmOMP::eval(double **f, int nfrom, int nto, int tid)
       }
 
       if (EVFLAG) ev_tally_thr(force->pair,i1,i4,nlocal,NEWTON_BOND,
-			       evdwl,ecoul,fpair,delx,dely,delz,tid);
+			       evdwl,ecoul,fpair,delx,dely,delz,thr);
     }
   }
 }
diff --git a/src/USER-OMP/dihedral_charmm_omp.h b/src/USER-OMP/dihedral_charmm_omp.h
index a39ad83f7e7e923e186890d42fbb0b556fb845bf..75ba6410d55d942af6a8435d9b4411699c1cb2a7 100644
--- a/src/USER-OMP/dihedral_charmm_omp.h
+++ b/src/USER-OMP/dihedral_charmm_omp.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
@@ -33,13 +33,13 @@ class DihedralCharmmOMP : public DihedralCharmm, public ThrOMP {
 
  public:
     DihedralCharmmOMP(class LAMMPS *lmp) : 
-      DihedralCharmm(lmp), ThrOMP(lmp,DIHEDRAL) {};
+      DihedralCharmm(lmp), ThrOMP(lmp,THR_DIHEDRAL|THR_CHARMM) {};
 
   virtual void compute(int, int);
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_BOND>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/dihedral_class2_omp.cpp b/src/USER-OMP/dihedral_class2_omp.cpp
index 7348296644cf0310d855de46bc50bb5dae8f1769..07e0fba6e117fab9646f4be98f4b64d30461f502 100644
--- a/src/USER-OMP/dihedral_class2_omp.cpp
+++ b/src/USER-OMP/dihedral_class2_omp.cpp
@@ -39,7 +39,6 @@ void DihedralClass2OMP::compute(int eflag, int vflag)
 
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -47,37 +46,34 @@ void DihedralClass2OMP::compute(int eflag, int vflag)
   const int inum = neighbor->ndihedrallist;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_BOND>
-void DihedralClass2OMP::eval(double **f, int nfrom, int nto, int tid)
+void DihedralClass2OMP::eval(int nfrom, int nto, ThrData * const thr)
 {
   
   int i1,i2,i3,i4,i,j,k,n,type;
@@ -96,9 +92,10 @@ void DihedralClass2OMP::eval(double **f, int nfrom, int nto, int tid)
 
   edihedral = 0.0;
 
-  double **x = atom->x;
-  int **dihedrallist = neighbor->dihedrallist;
-  int nlocal = atom->nlocal;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const dihedrallist = neighbor->dihedrallist;
+  const int nlocal = atom->nlocal;
 
   for (n = nfrom; n < nto; n++) {
     i1 = dihedrallist[n][0];
@@ -170,7 +167,7 @@ void DihedralClass2OMP::eval(double **f, int nfrom, int nto, int tid)
     sc2 = sqrt(sin2);
     if (sc2 < SMALL) sc2 = SMALL;
     sc2 = 1.0/sc2;
-          
+
     s1 = sc1 * sc1;
     s2 = sc2 * sc2;
     s12 = sc1 * sc2;
@@ -179,12 +176,12 @@ void DihedralClass2OMP::eval(double **f, int nfrom, int nto, int tid)
     // error check
 
     if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) {
-      int me;
-      MPI_Comm_rank(world,&me);
+      int me = comm->me;
+
       if (screen) {
 	char str[128];
-	sprintf(str,"Dihedral problem: %d " BIGINT_FORMAT " %d %d %d %d",
-		me,update->ntimestep,
+	sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d",
+		me,thr->get_tid(),update->ntimestep,
 		atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
 	error->warning(FLERR,str,0);
 	fprintf(screen,"  1st atom: %d %g %g %g\n",
@@ -526,7 +523,7 @@ void DihedralClass2OMP::eval(double **f, int nfrom, int nto, int tid)
     if (EVFLAG)
       ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,
 		   fabcd[0],fabcd[2],fabcd[3],
-		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid);
+		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr);
   }
 }
 
diff --git a/src/USER-OMP/dihedral_class2_omp.h b/src/USER-OMP/dihedral_class2_omp.h
index d26f2f87138140f64a70cc61408e4b10fa40605f..14a6c40edd39a022ff08c1bec26e9a8e1df65b48 100644
--- a/src/USER-OMP/dihedral_class2_omp.h
+++ b/src/USER-OMP/dihedral_class2_omp.h
@@ -33,13 +33,13 @@ class DihedralClass2OMP : public DihedralClass2, public ThrOMP {
 
  public:
     DihedralClass2OMP(class LAMMPS *lmp) : 
-      DihedralClass2(lmp), ThrOMP(lmp,DIHEDRAL) {};
+      DihedralClass2(lmp), ThrOMP(lmp,THR_DIHEDRAL) {};
 
   virtual void compute(int, int);
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_BOND>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp b/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp
index a6c027e92d7f107317388453e86cb96ffc555087..1a80e8a7cd746b5055237c05ca8e85d2e16919e4 100644
--- a/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp
+++ b/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp
@@ -39,7 +39,6 @@ void DihedralCosineShiftExpOMP::compute(int eflag, int vflag)
 
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -47,37 +46,34 @@ void DihedralCosineShiftExpOMP::compute(int eflag, int vflag)
   const int inum = neighbor->ndihedrallist;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_BOND>
-void DihedralCosineShiftExpOMP::eval(double **f, int nfrom, int nto, int tid)
+void DihedralCosineShiftExpOMP::eval(int nfrom, int nto, ThrData * const thr)
 {
   
   int i1,i2,i3,i4,n,type;
@@ -91,9 +87,10 @@ void DihedralCosineShiftExpOMP::eval(double **f, int nfrom, int nto, int tid)
 
   edihedral = 0.0;
 
-  double **x = atom->x;
-  int **dihedrallist = neighbor->dihedrallist;
-  int nlocal = atom->nlocal;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const dihedrallist = neighbor->dihedrallist;
+  const int nlocal = atom->nlocal;
 
   for (n = nfrom; n < nto; n++) {
     i1 = dihedrallist[n][0];
@@ -159,7 +156,7 @@ void DihedralCosineShiftExpOMP::eval(double **f, int nfrom, int nto, int tid)
       if (screen) {
 	char str[128];
 	sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d",
-		me,tid,update->ntimestep,
+		me,thr->get_tid(),update->ntimestep,
 		atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
 	error->warning(FLERR,str,0);
 	fprintf(screen,"  1st atom: %d %g %g %g\n",
@@ -172,7 +169,7 @@ void DihedralCosineShiftExpOMP::eval(double **f, int nfrom, int nto, int tid)
 		me,x[i4][0],x[i4][1],x[i4][2]);
       }
     }
-    
+
     if (c > 1.0) c = 1.0;
     if (c < -1.0) c = -1.0;
 
@@ -257,7 +254,7 @@ void DihedralCosineShiftExpOMP::eval(double **f, int nfrom, int nto, int tid)
 
     if (EVFLAG)
       ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4,
-		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid);
+		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr);
   }
 }
 
diff --git a/src/USER-OMP/dihedral_cosine_shift_exp_omp.h b/src/USER-OMP/dihedral_cosine_shift_exp_omp.h
index eb906ab9534b29051796f9a4017633a4a22900d7..54627c169b0a4016ec446bde9e1704102c8ad318 100644
--- a/src/USER-OMP/dihedral_cosine_shift_exp_omp.h
+++ b/src/USER-OMP/dihedral_cosine_shift_exp_omp.h
@@ -33,13 +33,13 @@ class DihedralCosineShiftExpOMP : public DihedralCosineShiftExp, public ThrOMP {
 
  public:
     DihedralCosineShiftExpOMP(class LAMMPS *lmp) : 
-      DihedralCosineShiftExp(lmp), ThrOMP(lmp,DIHEDRAL) {};
+      DihedralCosineShiftExp(lmp), ThrOMP(lmp,THR_DIHEDRAL) {};
 
   virtual void compute(int, int);
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_BOND>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/dihedral_harmonic_omp.cpp b/src/USER-OMP/dihedral_harmonic_omp.cpp
index 0fa24090a74a4ce11cf9f9ba2b29c25cafe84958..cdad9b6ab89fa1a41cad51388c39e6f55f535d6d 100644
--- a/src/USER-OMP/dihedral_harmonic_omp.cpp
+++ b/src/USER-OMP/dihedral_harmonic_omp.cpp
@@ -39,7 +39,6 @@ void DihedralHarmonicOMP::compute(int eflag, int vflag)
 
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -47,37 +46,34 @@ void DihedralHarmonicOMP::compute(int eflag, int vflag)
   const int inum = neighbor->ndihedrallist;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_BOND>
-void DihedralHarmonicOMP::eval(double **f, int nfrom, int nto, int tid)
+void DihedralHarmonicOMP::eval(int nfrom, int nto, ThrData * const thr)
 {
   
   int i1,i2,i3,i4,i,m,n,type;
@@ -90,9 +86,10 @@ void DihedralHarmonicOMP::eval(double **f, int nfrom, int nto, int tid)
 
   edihedral = 0.0;
 
-  double **x = atom->x;
-  int **dihedrallist = neighbor->dihedrallist;
-  int nlocal = atom->nlocal;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const dihedrallist = neighbor->dihedrallist;
+  const int nlocal = atom->nlocal;
 
   for (n = nfrom; n < nto; n++) {
     i1 = dihedrallist[n][0];
@@ -158,7 +155,7 @@ void DihedralHarmonicOMP::eval(double **f, int nfrom, int nto, int tid)
       if (screen) {
 	char str[128];
 	sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d",
-		me,tid,update->ntimestep,
+		me,thr->get_tid(),update->ntimestep,
 		atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
 	error->warning(FLERR,str,0);
 	fprintf(screen,"  1st atom: %d %g %g %g\n",
@@ -264,7 +261,6 @@ void DihedralHarmonicOMP::eval(double **f, int nfrom, int nto, int tid)
 
     if (EVFLAG)
       ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4,
-		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid);
+		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr);
   }
 }
-
diff --git a/src/USER-OMP/dihedral_harmonic_omp.h b/src/USER-OMP/dihedral_harmonic_omp.h
index 2d7bae64ee6e768423f6d67823be0c329a1b6ee7..8b8562ad90fad8592fa83a7e380b6dc0e5c5c3e3 100644
--- a/src/USER-OMP/dihedral_harmonic_omp.h
+++ b/src/USER-OMP/dihedral_harmonic_omp.h
@@ -1,4 +1,4 @@
-/* ----------------------------------------------------------------------
+/* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
@@ -33,13 +33,13 @@ class DihedralHarmonicOMP : public DihedralHarmonic, public ThrOMP {
 
  public:
     DihedralHarmonicOMP(class LAMMPS *lmp) : 
-      DihedralHarmonic(lmp), ThrOMP(lmp,DIHEDRAL) {};
+      DihedralHarmonic(lmp), ThrOMP(lmp,THR_DIHEDRAL) {};
 
   virtual void compute(int, int);
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_BOND>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/dihedral_helix_omp.cpp b/src/USER-OMP/dihedral_helix_omp.cpp
index 4ec701a0cbe3e429a428aef6e96f2bfdb2888305..b9b61982f9f92aa0cec0d543e36e6fb4cbd268b5 100644
--- a/src/USER-OMP/dihedral_helix_omp.cpp
+++ b/src/USER-OMP/dihedral_helix_omp.cpp
@@ -42,7 +42,6 @@ void DihedralHelixOMP::compute(int eflag, int vflag)
 
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -50,37 +49,34 @@ void DihedralHelixOMP::compute(int eflag, int vflag)
   const int inum = neighbor->ndihedrallist;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_BOND>
-void DihedralHelixOMP::eval(double **f, int nfrom, int nto, int tid)
+void DihedralHelixOMP::eval(int nfrom, int nto, ThrData * const thr)
 {
   
   int i1,i2,i3,i4,n,type;
@@ -94,9 +90,10 @@ void DihedralHelixOMP::eval(double **f, int nfrom, int nto, int tid)
 
   edihedral = 0.0;
 
-  double **x = atom->x;
-  int **dihedrallist = neighbor->dihedrallist;
-  int nlocal = atom->nlocal;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const dihedrallist = neighbor->dihedrallist;
+  const int nlocal = atom->nlocal;
 
   for (n = nfrom; n < nto; n++) {
     i1 = dihedrallist[n][0];
@@ -132,18 +129,18 @@ void DihedralHelixOMP::eval(double **f, int nfrom, int nto, int tid)
     domain->minimum_image(vb3x,vb3y,vb3z);
     
     // c0 calculation
-        
+
     sb1 = 1.0 / (vb1x*vb1x + vb1y*vb1y + vb1z*vb1z);
     sb2 = 1.0 / (vb2x*vb2x + vb2y*vb2y + vb2z*vb2z);
     sb3 = 1.0 / (vb3x*vb3x + vb3y*vb3y + vb3z*vb3z);
-        
+
     rb1 = sqrt(sb1);
     rb3 = sqrt(sb3);
-        
+
     c0 = (vb1x*vb3x + vb1y*vb3y + vb1z*vb3z) * rb1*rb3;
 
     // 1st and 2nd angle
-        
+
     b1mag2 = vb1x*vb1x + vb1y*vb1y + vb1z*vb1z;
     b1mag = sqrt(b1mag2);
     b2mag2 = vb2x*vb2x + vb2y*vb2y + vb2z*vb2z;
@@ -181,15 +178,16 @@ void DihedralHelixOMP::eval(double **f, int nfrom, int nto, int tid)
     cz = vb1x*vb2y - vb1y*vb2x;
     cmag = sqrt(cx*cx + cy*cy + cz*cz);
     dx = (cx*vb3x + cy*vb3y + cz*vb3z)/cmag/b3mag;
-    
+
     // error check
 
     if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) {
       int me = comm->me;
+
       if (screen) {
 	char str[128];
 	sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d",
-		me,tid,update->ntimestep,
+		me,thr->get_tid(),update->ntimestep,
 		atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
 	error->warning(FLERR,str,0);
 	fprintf(screen,"  1st atom: %d %g %g %g\n",
@@ -202,7 +200,7 @@ void DihedralHelixOMP::eval(double **f, int nfrom, int nto, int tid)
 		me,x[i4][0],x[i4][1],x[i4][2]);
       }
     }
-    
+
     if (c > 1.0) c = 1.0;
     if (c < -1.0) c = -1.0;
 
@@ -217,7 +215,6 @@ void DihedralHelixOMP::eval(double **f, int nfrom, int nto, int tid)
 
     if (EFLAG) edihedral = aphi[type]*(1.0 - c) + bphi[type]*(1.0 + cos(3.0*phi)) +
 		 cphi[type]*(1.0 + cos(phi + MY_PI4));
-;
 
     a = pd;
     c = c * a;
@@ -277,6 +274,6 @@ void DihedralHelixOMP::eval(double **f, int nfrom, int nto, int tid)
 
     if (EVFLAG)
       ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4,
-		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid);
+		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr);
   }
 }
diff --git a/src/USER-OMP/dihedral_helix_omp.h b/src/USER-OMP/dihedral_helix_omp.h
index 792319741345b5ce9a2e30fa38e12519b69c7963..e932045cfff06717d8a0298206b2349ca880f5e0 100644
--- a/src/USER-OMP/dihedral_helix_omp.h
+++ b/src/USER-OMP/dihedral_helix_omp.h
@@ -33,13 +33,13 @@ class DihedralHelixOMP : public DihedralHelix, public ThrOMP {
 
  public:
     DihedralHelixOMP(class LAMMPS *lmp) : 
-      DihedralHelix(lmp), ThrOMP(lmp,DIHEDRAL) {};
+      DihedralHelix(lmp), ThrOMP(lmp,THR_DIHEDRAL) {};
 
   virtual void compute(int, int);
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_BOND>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/dihedral_multi_harmonic_omp.cpp b/src/USER-OMP/dihedral_multi_harmonic_omp.cpp
index bde958984e5e148635b31c78a0b23c5abf295a15..822ddb79650ab6c0f82f7c966417698bec10fadf 100644
--- a/src/USER-OMP/dihedral_multi_harmonic_omp.cpp
+++ b/src/USER-OMP/dihedral_multi_harmonic_omp.cpp
@@ -39,7 +39,6 @@ void DihedralMultiHarmonicOMP::compute(int eflag, int vflag)
 
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -47,37 +46,34 @@ void DihedralMultiHarmonicOMP::compute(int eflag, int vflag)
   const int inum = neighbor->ndihedrallist;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_BOND>
-void DihedralMultiHarmonicOMP::eval(double **f, int nfrom, int nto, int tid)
+void DihedralMultiHarmonicOMP::eval(int nfrom, int nto, ThrData * const thr)
 {
   
   int i1,i2,i3,i4,n,type;
@@ -91,9 +87,10 @@ void DihedralMultiHarmonicOMP::eval(double **f, int nfrom, int nto, int tid)
 
   edihedral = 0.0;
 
-  double **x = atom->x;
-  int **dihedrallist = neighbor->dihedrallist;
-  int nlocal = atom->nlocal;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const dihedrallist = neighbor->dihedrallist;
+  const int nlocal = atom->nlocal;
 
   for (n = nfrom; n < nto; n++) {
     i1 = dihedrallist[n][0];
@@ -180,7 +177,7 @@ void DihedralMultiHarmonicOMP::eval(double **f, int nfrom, int nto, int tid)
       if (screen) {
 	char str[128];
 	sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d",
-		me,tid,update->ntimestep,
+		me,thr->get_tid(),update->ntimestep,
 		atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
 	error->warning(FLERR,str,0);
 	fprintf(screen,"  1st atom: %d %g %g %g\n",
@@ -264,6 +261,6 @@ void DihedralMultiHarmonicOMP::eval(double **f, int nfrom, int nto, int tid)
 
     if (EVFLAG)
       ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4,
-		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid);
+		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr);
   }
 }
diff --git a/src/USER-OMP/dihedral_multi_harmonic_omp.h b/src/USER-OMP/dihedral_multi_harmonic_omp.h
index da2322f03834eb79c85eb93bda5b7eb6ff0a5c0e..628ad2a6a09dc2fc9df8e8e2274420ee060f16e1 100644
--- a/src/USER-OMP/dihedral_multi_harmonic_omp.h
+++ b/src/USER-OMP/dihedral_multi_harmonic_omp.h
@@ -33,13 +33,13 @@ class DihedralMultiHarmonicOMP : public DihedralMultiHarmonic, public ThrOMP {
 
  public:
     DihedralMultiHarmonicOMP(class LAMMPS *lmp) : 
-      DihedralMultiHarmonic(lmp), ThrOMP(lmp,DIHEDRAL) {};
+      DihedralMultiHarmonic(lmp), ThrOMP(lmp,THR_DIHEDRAL) {};
 
   virtual void compute(int, int);
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_BOND>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/dihedral_opls_omp.cpp b/src/USER-OMP/dihedral_opls_omp.cpp
index 9f59e26d26dee23d96c3c781b35661eecb0085eb..6e46575f3b2d4f0ae00a552c8b7d1b51f9ba30c8 100644
--- a/src/USER-OMP/dihedral_opls_omp.cpp
+++ b/src/USER-OMP/dihedral_opls_omp.cpp
@@ -40,7 +40,6 @@ void DihedralOPLSOMP::compute(int eflag, int vflag)
 
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -48,37 +47,34 @@ void DihedralOPLSOMP::compute(int eflag, int vflag)
   const int inum = neighbor->ndihedrallist;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_BOND>
-void DihedralOPLSOMP::eval(double **f, int nfrom, int nto, int tid)
+void DihedralOPLSOMP::eval(int nfrom, int nto, ThrData * const thr)
 {
   
   int i1,i2,i3,i4,n,type;
@@ -92,9 +88,10 @@ void DihedralOPLSOMP::eval(double **f, int nfrom, int nto, int tid)
 
   edihedral = 0.0;
 
-  double **x = atom->x;
-  int **dihedrallist = neighbor->dihedrallist;
-  int nlocal = atom->nlocal;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const dihedrallist = neighbor->dihedrallist;
+  const int nlocal = atom->nlocal;
 
   for (n = nfrom; n < nto; n++) {
     i1 = dihedrallist[n][0];
@@ -188,7 +185,7 @@ void DihedralOPLSOMP::eval(double **f, int nfrom, int nto, int tid)
       if (screen) {
 	char str[128];
 	sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d",
-		me,tid,update->ntimestep,
+		me,thr->get_tid(),update->ntimestep,
 		atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
 	error->warning(FLERR,str,0);
 	fprintf(screen,"  1st atom: %d %g %g %g\n",
@@ -201,7 +198,7 @@ void DihedralOPLSOMP::eval(double **f, int nfrom, int nto, int tid)
 		me,x[i4][0],x[i4][1],x[i4][2]);
       }
     }
-    
+
     if (c > 1.0) c = 1.0;
     if (c < -1.0) c = -1.0;
 
@@ -280,7 +277,7 @@ void DihedralOPLSOMP::eval(double **f, int nfrom, int nto, int tid)
 
     if (EVFLAG)
       ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4,
-		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid);
+		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr);
   }
 }
 
diff --git a/src/USER-OMP/dihedral_opls_omp.h b/src/USER-OMP/dihedral_opls_omp.h
index 58b9920538b6919ff6d93d9d0ec50e624f0661ab..44c76bb2adc2caf476bb03042ce56a49433fa879 100644
--- a/src/USER-OMP/dihedral_opls_omp.h
+++ b/src/USER-OMP/dihedral_opls_omp.h
@@ -33,13 +33,13 @@ class DihedralOPLSOMP : public DihedralOPLS, public ThrOMP {
 
  public:
     DihedralOPLSOMP(class LAMMPS *lmp) : 
-      DihedralOPLS(lmp), ThrOMP(lmp,DIHEDRAL) {};
+      DihedralOPLS(lmp), ThrOMP(lmp,THR_DIHEDRAL) {};
 
   virtual void compute(int, int);
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_BOND>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/fix_nve_sphere_omp.cpp b/src/USER-OMP/fix_nve_sphere_omp.cpp
index a642b21f222c409a6e4c04f23f70d3e438d9f39d..93af055f8111d9ae5505bb0e085e9b5dcaa5299c 100644
--- a/src/USER-OMP/fix_nve_sphere_omp.cpp
+++ b/src/USER-OMP/fix_nve_sphere_omp.cpp
@@ -34,26 +34,24 @@ enum{NONE,DIPOLE};
 
 void FixNVESphereOMP::initial_integrate(int vflag)
 {
-  double **x = atom->x;
-  double **v = atom->v;
-  double **f = atom->f;
-  double **omega = atom->omega;
-  double **torque = atom->torque;
-  double *radius = atom->radius;
-  double *rmass = atom->rmass;
-  int *mask = atom->mask;
-  int nlocal = atom->nlocal;
+  double * const * const x = atom->x;
+  double * const * const v = atom->v;
+  const double * const * const f = atom->f;
+  double * const * const omega = atom->omega;
+  const double * const * const torque = atom->torque;
+  const double * const radius = atom->radius;
+  const double * const rmass = atom->rmass;
+  const int * const mask = atom->mask;
+  const int nlocal = (igroup == atom->firstgroup) ? atom->nfirst : atom->nlocal;
   int i;
   
-  if (igroup == atom->firstgroup) nlocal = atom->nfirst;
-
   // set timestep here since dt may have changed or come via rRESPA
   const double dtfrotate = dtf / INERTIA;
 
   // update v,x,omega for all particles
   // d_omega/dt = torque / inertia
 #if defined(_OPENMP)
-#pragma omp parallel for private(i) default(shared)
+#pragma omp parallel for private(i) default(none)
 #endif
   for (i = 0; i < nlocal; i++) {
     if (mask[i] & groupbit) {
@@ -77,9 +75,9 @@ void FixNVESphereOMP::initial_integrate(int vflag)
   // renormalize mu to dipole length
 
   if (extra == DIPOLE) {
-    double **mu = atom->mu;
+    double * const * const mu = atom->mu;
 #if defined(_OPENMP)
-#pragma omp parallel for private(i) default(shared)
+#pragma omp parallel for private(i) default(none)
 #endif
     for (i = 0; i < nlocal; i++) { 
       double g0,g1,g2,msq,scale;
@@ -103,18 +101,16 @@ void FixNVESphereOMP::initial_integrate(int vflag)
 
 void FixNVESphereOMP::final_integrate()
 {
-  double **v = atom->v;
-  double **f = atom->f;
-  double **omega = atom->omega;
-  double **torque = atom->torque;
-  double *rmass = atom->rmass;
-  double *radius = atom->radius;
-  int *mask = atom->mask;
-  int nlocal = atom->nlocal;
+  double * const * const v = atom->v;
+  const double * const * const f = atom->f;
+  double * const * const omega = atom->omega;
+  const double * const * const torque = atom->torque;
+  const double * const rmass = atom->rmass;
+  const double * const radius = atom->radius;
+  const int * const mask = atom->mask;
+  const int nlocal = (igroup == atom->firstgroup) ? atom->nfirst : atom->nlocal;
   int i;
   
-  if (igroup == atom->firstgroup) nlocal = atom->nfirst;
-
   // set timestep here since dt may have changed or come via rRESPA
 
   const double dtfrotate = dtf / INERTIA;
@@ -123,7 +119,7 @@ void FixNVESphereOMP::final_integrate()
   // d_omega/dt = torque / inertia
 
 #if defined(_OPENMP)
-#pragma omp parallel for private(i) default(shared)
+#pragma omp parallel for private(i) default(none)
 #endif
   for (i = 0; i < nlocal; i++)
     if (mask[i] & groupbit) {
diff --git a/src/USER-OMP/fix_shear_history_omp.cpp b/src/USER-OMP/fix_shear_history_omp.cpp
index 40781cb4078812f36b28a64037596e187170b3fe..4655dd1af77a8f09ba188b8d26d992b96a475085 100644
--- a/src/USER-OMP/fix_shear_history_omp.cpp
+++ b/src/USER-OMP/fix_shear_history_omp.cpp
@@ -47,7 +47,7 @@ void FixShearHistoryOMP::pre_exchange()
 
   int flag = 0;
 #if defined(_OPENMP)
-#pragma omp parallel shared(flag)
+#pragma omp parallel default(none) shared(flag)
 #endif
   {
 
diff --git a/src/USER-OMP/pair_adp_omp.cpp b/src/USER-OMP/pair_adp_omp.cpp
index e91642e6ba61e2f9bb24a7ac8d9da7d86d3019a5..3af4a2f7cd5e0bf6dc0542afe53ab24148396d8e 100644
--- a/src/USER-OMP/pair_adp_omp.cpp
+++ b/src/USER-OMP/pair_adp_omp.cpp
@@ -28,7 +28,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairADPOMP::PairADPOMP(LAMMPS *lmp) :
-  PairADP(lmp), ThrOMP(lmp, PAIR)
+  PairADP(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -39,10 +39,10 @@ void PairADPOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0;
 
-  const int nall = atom->nlocal + atom->nghost;
+  const int nlocal = atom->nlocal;
+  const int nall = nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
@@ -62,48 +62,39 @@ void PairADPOMP::compute(int eflag, int vflag)
   }
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f, *rho_t, **mu_t, **lambda_t;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
-    if (force->newton_pair) {
-      rho_t = rho + tid*nall;
-      mu_t = mu + tid*nall;
-      lambda_t = lambda + tid*nall;
-    } else {
-      rho_t = rho + tid*atom->nlocal;
-      mu_t = mu + tid*atom->nlocal;
-      lambda_t = lambda + tid*atom->nlocal;
-    }
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+    
+    if (force->newton_pair)
+      thr->init_adp(nall, rho, mu, lambda);
+    else
+      thr->init_adp(nlocal, rho, mu, lambda);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid);
-	else eval<1,1,0>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid);
-	else eval<1,0,0>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid);
-      else eval<0,0,0>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairADPOMP::eval(double **f, double *rho_t, double **mu_t, 
-		      double **lambda_t, int iifrom, int iito, int tid)
+void PairADPOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,m,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -117,7 +108,13 @@ void PairADPOMP::eval(double **f, double *rho_t, double **mu_t,
 
   evdwl = 0.0;
 
-  double **x = atom->x;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  double * const rho_t = thr->get_rho();
+  double * const * const mu_t = thr->get_mu();
+  double * const * const lambda_t = thr->get_lambda();
+  const int tid = thr->get_tid();
+
   int *type = atom->type;
   int nlocal = atom->nlocal;
   int nall = nlocal + atom->nghost;
@@ -128,18 +125,6 @@ void PairADPOMP::eval(double **f, double *rho_t, double **mu_t,
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
-  // zero out density 
-
-  if (NEWTON_PAIR) {
-    memset(rho_t, 0, nall*sizeof(double));
-    memset(&(mu_t[0][0]), 0, 3*nall*sizeof(double));
-    memset(&(lambda_t[0][0]), 0, 6*nall*sizeof(double));
-  } else {
-    memset(rho_t, 0, nlocal*sizeof(double));
-    memset(&(mu_t[0][0]), 0, 3*nlocal*sizeof(double));
-    memset(&(lambda_t[0][0]), 0, 6*nlocal*sizeof(double));
-  }
-
   // rho = density at each atom
   // loop over neighbors of my atoms
 
@@ -259,8 +244,7 @@ void PairADPOMP::eval(double **f, double *rho_t, double **mu_t,
 		  lambda[i][4]+lambda[i][5]*lambda[i][5]);
       phi -= 1.0/6.0*(lambda[i][0]+lambda[i][1]+lambda[i][2])*
 	(lambda[i][0]+lambda[i][1]+lambda[i][2]);
-      if (eflag_global) eng_vdwl_thr[tid] += phi;
-      if (eflag_atom) eatom_thr[tid][i] += phi;
+      e_tally_thr(this,i,i,nlocal,/* newton_pair */ 1, phi, 0.0, thr);
     }
   }
 
@@ -384,7 +368,7 @@ void PairADPOMP::eval(double **f, double *rho_t, double **mu_t,
 
 	if (EFLAG) evdwl = phi;
 	if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR,evdwl,0.0,
-				     fx,fy,fz,delx,dely,delz,tid);
+				     fx,fy,fz,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
@@ -399,6 +383,6 @@ double PairADPOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
   bytes += PairADP::memory_usage();
-
+  bytes += (comm->nthreads-1) * nmax * (10*sizeof(double) + 3*sizeof(double *));
   return bytes;
 }
diff --git a/src/USER-OMP/pair_adp_omp.h b/src/USER-OMP/pair_adp_omp.h
index f7d2509cd3c01b7a735c6675460f330aeb502671..9a7f4023fb4ce674ab222609ef78ff67f3870e64 100644
--- a/src/USER-OMP/pair_adp_omp.h
+++ b/src/USER-OMP/pair_adp_omp.h
@@ -39,8 +39,7 @@ class PairADPOMP : public PairADP, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, double *rho_t, double **mu_t, double **lambda_t, 
-	    int iifrom, int iito, int tid);
+  void eval(int iifrom, int iito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_born_coul_long_omp.cpp b/src/USER-OMP/pair_born_coul_long_omp.cpp
index c277a080c0974d764aee599a923e073e71d7bbfe..cf409f3cfc74d31d1b978d5215c158819b111382 100644
--- a/src/USER-OMP/pair_born_coul_long_omp.cpp
+++ b/src/USER-OMP/pair_born_coul_long_omp.cpp
@@ -33,7 +33,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairBornCoulLongOMP::PairBornCoulLongOMP(LAMMPS *lmp) :
-  PairBornCoulLong(lmp), ThrOMP(lmp, PAIR)
+  PairBornCoulLong(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -44,7 +44,6 @@ void PairBornCoulLongOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -52,40 +51,36 @@ void PairBornCoulLongOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairBornCoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairBornCoulLongOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
@@ -95,9 +90,10 @@ void PairBornCoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  int *type = atom->type;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
@@ -179,7 +175,7 @@ void PairBornCoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
 	} else evdwl = 0.0;
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,ecoul,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_born_coul_long_omp.h b/src/USER-OMP/pair_born_coul_long_omp.h
index d6ccbfc680b648d5989cfdc217c779b46722612e..3271c566a48361e5692aa17f6df5dab67b42f24b 100644
--- a/src/USER-OMP/pair_born_coul_long_omp.h
+++ b/src/USER-OMP/pair_born_coul_long_omp.h
@@ -39,7 +39,7 @@ class PairBornCoulLongOMP : public PairBornCoulLong, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_born_omp.cpp b/src/USER-OMP/pair_born_omp.cpp
index c39d205c972fa834760ebdff2358d0ba963d611c..d9dbf0d29e34d8a2c5cf5904165e8a85d06bc6b6 100644
--- a/src/USER-OMP/pair_born_omp.cpp
+++ b/src/USER-OMP/pair_born_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairBornOMP::PairBornOMP(LAMMPS *lmp) :
-  PairBorn(lmp), ThrOMP(lmp, PAIR)
+  PairBorn(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairBornOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,38 +43,34 @@ void PairBornOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairBornOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairBornOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -84,7 +79,8 @@ void PairBornOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
@@ -143,7 +139,7 @@ void PairBornOMP::eval(double **f, int iifrom, int iito, int tid)
 	}
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_born_omp.h b/src/USER-OMP/pair_born_omp.h
index b24de4a57778f1ddaa3bb030acb12df6947372cb..726064472844e577465d5c35cff4994e3579f476 100644
--- a/src/USER-OMP/pair_born_omp.h
+++ b/src/USER-OMP/pair_born_omp.h
@@ -39,7 +39,7 @@ class PairBornOMP : public PairBorn, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_buck_coul_cut_omp.cpp b/src/USER-OMP/pair_buck_coul_cut_omp.cpp
index ac47d478a03f692cba6fd61da3e702b44677105f..235f1c4f2cb104f19ad68d8b046cb970be63c136 100644
--- a/src/USER-OMP/pair_buck_coul_cut_omp.cpp
+++ b/src/USER-OMP/pair_buck_coul_cut_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairBuckCoulCutOMP::PairBuckCoulCutOMP(LAMMPS *lmp) :
-  PairBuckCoulCut(lmp), ThrOMP(lmp, PAIR)
+  PairBuckCoulCut(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairBuckCoulCutOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,40 +43,35 @@ void PairBuckCoulCutOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
-
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairBuckCoulCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairBuckCoulCutOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
@@ -86,8 +80,9 @@ void PairBuckCoulCutOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
@@ -162,7 +157,7 @@ void PairBuckCoulCutOMP::eval(double **f, int iifrom, int iito, int tid)
 	} else evdwl = 0.0;
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,ecoul,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_buck_coul_cut_omp.h b/src/USER-OMP/pair_buck_coul_cut_omp.h
index a77f3bad24e13fdaef7564b0c2961d8db0cc083e..8fee0808c0007d2b10c91633d5a521f503f1cf9f 100644
--- a/src/USER-OMP/pair_buck_coul_cut_omp.h
+++ b/src/USER-OMP/pair_buck_coul_cut_omp.h
@@ -39,7 +39,7 @@ class PairBuckCoulCutOMP : public PairBuckCoulCut, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_buck_coul_long_omp.cpp b/src/USER-OMP/pair_buck_coul_long_omp.cpp
index 6e7398ca449b3d8ab666313c30e221a34f5ecb88..083b9acc6ed3fc91f0265028179ea683accd65eb 100644
--- a/src/USER-OMP/pair_buck_coul_long_omp.cpp
+++ b/src/USER-OMP/pair_buck_coul_long_omp.cpp
@@ -33,7 +33,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairBuckCoulLongOMP::PairBuckCoulLongOMP(LAMMPS *lmp) :
-  PairBuckCoulLong(lmp), ThrOMP(lmp, PAIR)
+  PairBuckCoulLong(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -44,7 +44,6 @@ void PairBuckCoulLongOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -52,40 +51,37 @@ void PairBuckCoulLongOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
     // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairBuckCoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairBuckCoulLongOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
@@ -95,8 +91,9 @@ void PairBuckCoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
@@ -178,7 +175,7 @@ void PairBuckCoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
 	} else evdwl = 0.0;
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,ecoul,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_buck_coul_long_omp.h b/src/USER-OMP/pair_buck_coul_long_omp.h
index 2c87904de8eaee46f2689eb63b54f437630f4f98..a47e809eec074c709f5a4c28527f30a4aaf6ca9b 100644
--- a/src/USER-OMP/pair_buck_coul_long_omp.h
+++ b/src/USER-OMP/pair_buck_coul_long_omp.h
@@ -39,7 +39,7 @@ class PairBuckCoulLongOMP : public PairBuckCoulLong, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_buck_coul_omp.cpp b/src/USER-OMP/pair_buck_coul_omp.cpp
index bd171f628a48f0ab8d9bb212a31d8081fd2dbef2..97299feeeb958eb6f35649a14fbf6b6a58b0cda1 100644
--- a/src/USER-OMP/pair_buck_coul_omp.cpp
+++ b/src/USER-OMP/pair_buck_coul_omp.cpp
@@ -34,7 +34,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairBuckCoulOMP::PairBuckCoulOMP(LAMMPS *lmp) :
-  PairBuckCoul(lmp), ThrOMP(lmp, PAIR)
+  PairBuckCoul(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -45,7 +45,6 @@ void PairBuckCoulOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -53,53 +52,50 @@ void PairBuckCoulOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairBuckCoulOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairBuckCoulOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   double evdwl,ecoul,fpair;
   evdwl = ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   double qqrd2e = force->qqrd2e;
 
-  double *x0 = x[0];
+  const double *x0 = x[0];
   double *f0 = f[0], *fi = f0;
 
   int *ilist = list->ilist;
@@ -129,7 +125,7 @@ void PairBuckCoulOMP::eval(double **f, int iifrom, int iito, int tid)
       ni = sbmask(j);
       j &= NEIGHMASK;
       
-      { register double *xj = x0+(j+(j<<1));
+      { const register double *xj = x0+(j+(j<<1));
 	d[0] = xi[0] - xj[0];				// pair vector
 	d[1] = xi[1] - xj[1];
 	d[2] = xi[2] - xj[2]; }
@@ -214,7 +210,7 @@ void PairBuckCoulOMP::eval(double **f, int iifrom, int iito, int tid)
       }
 
       if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
-			       evdwl,ecoul,fpair,d[0],d[1],d[2],tid);
+			       evdwl,ecoul,fpair,d[0],d[1],d[2],thr);
     }
   }
 }
diff --git a/src/USER-OMP/pair_buck_coul_omp.h b/src/USER-OMP/pair_buck_coul_omp.h
index dbff9b419a7ca0ca5bb8bf3cc340d2d921c7648c..823f64a4aba1a79425d3173b71989264d65f6c05 100644
--- a/src/USER-OMP/pair_buck_coul_omp.h
+++ b/src/USER-OMP/pair_buck_coul_omp.h
@@ -39,7 +39,7 @@ class PairBuckCoulOMP : public PairBuckCoul, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_buck_omp.cpp b/src/USER-OMP/pair_buck_omp.cpp
index 66d8730abd72bef72da6e3092400382bb9432a55..5806a3e796884ffcf0fc92112be8eae7fd92d09f 100644
--- a/src/USER-OMP/pair_buck_omp.cpp
+++ b/src/USER-OMP/pair_buck_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairBuckOMP::PairBuckOMP(LAMMPS *lmp) :
-  PairBuck(lmp), ThrOMP(lmp, PAIR)
+  PairBuck(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairBuckOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,38 +43,34 @@ void PairBuckOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairBuckOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairBuckOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -84,7 +79,8 @@ void PairBuckOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
@@ -145,7 +141,7 @@ void PairBuckOMP::eval(double **f, int iifrom, int iito, int tid)
 	}
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_buck_omp.h b/src/USER-OMP/pair_buck_omp.h
index 40b6702e6f0da006d1ccbdd2a1eb1e14550a4457..c73e3f0d08c81d498712cbe3964b7f46bd2d7fed 100644
--- a/src/USER-OMP/pair_buck_omp.h
+++ b/src/USER-OMP/pair_buck_omp.h
@@ -39,7 +39,7 @@ class PairBuckOMP : public PairBuck, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_cdeam_omp.cpp b/src/USER-OMP/pair_cdeam_omp.cpp
index 01bd5f6eaa36b4eaa93b935236acaf4a431c5564..287b39ceb1f67fa214197b738e5486c0f152a2aa 100644
--- a/src/USER-OMP/pair_cdeam_omp.cpp
+++ b/src/USER-OMP/pair_cdeam_omp.cpp
@@ -44,7 +44,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairCDEAMOMP::PairCDEAMOMP(LAMMPS *lmp, int _cdeamVersion) :
-  PairCDEAM(lmp,_cdeamVersion), PairEAM(lmp), ThrOMP(lmp, PAIR)
+  PairEAM(lmp), PairCDEAM(lmp,_cdeamVersion), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -55,7 +55,6 @@ void PairCDEAMOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -78,22 +77,19 @@ void PairCDEAMOMP::compute(int eflag, int vflag)
   }
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f, *rho_t, *rhoB_t, *D_values_t;
-
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
-    if (force->newton_pair) {
-      rho_t = rho + tid*nall;
-      rhoB_t = rhoB + tid*nall;
-      D_values_t = D_values + tid*nall;
-    } else {
-      rho_t = rho + tid*atom->nlocal;
-      rhoB_t = rhoB + tid*atom->nlocal;
-      D_values_t = D_values + tid*atom->nlocal;
-    }
+ 
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+    
+    if (force->newton_pair)
+      thr->init_cdeam(nall, rho, rhoB, D_values);
+    else
+      thr->init_cdeam(atom->nlocal, rho, rhoB, D_values);
 
     switch (cdeamVersion) {
 
@@ -101,15 +97,15 @@ void PairCDEAMOMP::compute(int eflag, int vflag)
   
       if (evflag) {
 	if (eflag) {
-	  if (force->newton_pair) eval<1,1,1,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
-	  else eval<1,1,0,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
+	  if (force->newton_pair) eval<1,1,1,1>(ifrom, ito, thr);
+	  else eval<1,1,0,1>(ifrom, ito, thr);
 	} else {
-	  if (force->newton_pair) eval<1,0,1,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
-	  else eval<1,0,0,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
+	  if (force->newton_pair) eval<1,0,1,1>(ifrom, ito, thr);
+	  else eval<1,0,0,1>(ifrom, ito, thr);
 	}
       } else {
-	if (force->newton_pair) eval<0,0,1,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
-	else eval<0,0,0,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
+	if (force->newton_pair) eval<0,0,1,1>(ifrom, ito, thr);
+	else eval<0,0,0,1>(ifrom, ito, thr);
       }
       break;
 
@@ -117,15 +113,15 @@ void PairCDEAMOMP::compute(int eflag, int vflag)
 
       if (evflag) {
 	if (eflag) {
-	  if (force->newton_pair) eval<1,1,1,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
-	  else eval<1,1,0,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
+	  if (force->newton_pair) eval<1,1,1,2>(ifrom, ito, thr);
+	  else eval<1,1,0,2>(ifrom, ito, thr);
 	} else {
-	  if (force->newton_pair) eval<1,0,1,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
-	  else eval<1,0,0,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
+	  if (force->newton_pair) eval<1,0,1,2>(ifrom, ito, thr);
+	  else eval<1,0,0,2>(ifrom, ito, thr);
 	}
       } else {
-	if (force->newton_pair) eval<0,0,1,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
-	else eval<0,0,0,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
+	if (force->newton_pair) eval<0,0,1,2>(ifrom, ito, thr);
+	else eval<0,0,0,2>(ifrom, ito, thr);
       }
       break;
 
@@ -136,18 +132,12 @@ void PairCDEAMOMP::compute(int eflag, int vflag)
     error->all(FLERR,"unsupported eam/cd pair style variant");
     }
     
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR, int CDEAMVERSION>
-void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t, 
-		      double *D_values_t, int iifrom, int iito, int tid)
+void PairCDEAMOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -156,10 +146,17 @@ void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t,
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  int nall = nlocal + atom->nghost;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  double * const rho_t = thr->get_rho();
+  double * const rhoB_t = thr->get_rhoB();
+  double * const D_values_t = thr->get_D_values();
+  const int tid = thr->get_tid();
+  const int nthreads = comm->nthreads;
+
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const int nall = nlocal + atom->nghost;
 
   double fxtmp,fytmp,fztmp;
 
@@ -167,18 +164,6 @@ void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t,
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
-  // zero out density 
-
-  if (NEWTON_PAIR) {
-    memset(rho_t, 0, nall*sizeof(double));
-    memset(rhoB_t, 0, nall*sizeof(double));
-    memset(D_values_t, 0, nall*sizeof(double));
-  } else {
-    memset(rho_t, 0, nlocal*sizeof(double));
-    memset(rhoB_t, 0, nlocal*sizeof(double));
-    memset(D_values_t, 0, nlocal*sizeof(double));
-  }
-
   // Stage I
 
   // Compute rho and rhoB at each local atom site.
@@ -240,10 +225,10 @@ void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t,
 
   if (NEWTON_PAIR) {
     // reduce per thread density
-    data_reduce_thr(&(rho[0]), nall, comm->nthreads, 1, tid);
-    data_reduce_thr(&(rhoB[0]), nall, comm->nthreads, 1, tid);
+    data_reduce_thr(rho, nall, nthreads, 1, tid);
+    data_reduce_thr(rhoB, nall, nthreads, 1, tid);
     if (CDEAMVERSION==1)
-      data_reduce_thr(&(D_values[0]), nall, comm->nthreads, 1, tid);
+      data_reduce_thr(D_values, nall, nthreads, 1, tid);
 
     // wait until reduction is complete
     sync_threads();
@@ -259,10 +244,10 @@ void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t,
   
   } else {
     // reduce per thread density
-    data_reduce_thr(&(rho[0]), nlocal, comm->nthreads, 1, tid);
-    data_reduce_thr(&(rhoB[0]), nlocal, comm->nthreads, 1, tid);
+    data_reduce_thr(rho, nlocal, nthreads, 1, tid);
+    data_reduce_thr(rhoB, nlocal, nthreads, 1, tid);
     if (CDEAMVERSION==1)
-      data_reduce_thr(&(D_values[0]), nlocal, comm->nthreads, 1, tid);
+      data_reduce_thr(D_values, nlocal, nthreads, 1, tid);
 
     // wait until reduction is complete
     sync_threads();
@@ -277,8 +262,7 @@ void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t,
     fp[i] = FPrimeOfRho(index, type[i]);
     if(EFLAG) {
       phi = FofRho(index, type[i]);
-      if (eflag_global) eng_vdwl_thr[tid] += phi;
-      if (eflag_atom) eatom_thr[tid][i] += phi;
+      e_tally_thr(this, i, i, nlocal, NEWTON_PAIR, phi, 0.0, thr);
     }
   }
 
@@ -360,7 +344,7 @@ void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t,
     }
 
     if (NEWTON_PAIR) {
-      data_reduce_thr(&(D_values[0]), nall, comm->nthreads, 1, tid);
+      data_reduce_thr(D_values, nall, nthreads, 1, tid);
 
       // wait until reduction is complete
       sync_threads();
@@ -375,7 +359,7 @@ void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t,
       sync_threads();
   
   } else {
-      data_reduce_thr(&(D_values[0]), nlocal, comm->nthreads, 1, tid);
+      data_reduce_thr(D_values, nlocal, nthreads, 1, tid);
 
     // wait until reduction is complete
     sync_threads();
@@ -525,7 +509,7 @@ void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t,
 
 	if(EFLAG) evdwl = phi;
 	if(EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,evdwl,0.0,
-				fpair,delx,dely,delz,tid);
+				fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_cdeam_omp.h b/src/USER-OMP/pair_cdeam_omp.h
index 85b124cb171ba58401efdd5b55f658432ef213a7..46f460f8fa07144c4235bd7fde547dc99fbe7575 100644
--- a/src/USER-OMP/pair_cdeam_omp.h
+++ b/src/USER-OMP/pair_cdeam_omp.h
@@ -40,8 +40,7 @@ class PairCDEAMOMP : public PairCDEAM, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR, int CDEAMVERSION>
-  void eval(double **f, double *rho_t, double *rhoB_t, double *D_values_t, 
-	    int iifrom, int iito, int tid);
+  void eval(int iifrom, int iito, ThrData * const thr);
 };
 
   /// The one-site concentration formulation of CD-EAM.
diff --git a/src/USER-OMP/pair_colloid_omp.cpp b/src/USER-OMP/pair_colloid_omp.cpp
index c8bc74407a1bbdd0cbc5a427123376373094ec76..7bfe1c04dea65098eae8fed7ae6458a819badfad 100644
--- a/src/USER-OMP/pair_colloid_omp.cpp
+++ b/src/USER-OMP/pair_colloid_omp.cpp
@@ -26,7 +26,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairColloidOMP::PairColloidOMP(LAMMPS *lmp) :
-  PairColloid(lmp), ThrOMP(lmp, PAIR)
+  PairColloid(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -37,7 +37,6 @@ void PairColloidOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -45,38 +44,34 @@ void PairColloidOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairColloidOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairColloidOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -86,10 +81,11 @@ void PairColloidOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -204,7 +200,7 @@ void PairColloidOMP::eval(double **f, int iifrom, int iito, int tid)
       }
 
       if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-			       evdwl,0.0,fpair,delx,dely,delz,tid);
+			       evdwl,0.0,fpair,delx,dely,delz,thr);
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
diff --git a/src/USER-OMP/pair_colloid_omp.h b/src/USER-OMP/pair_colloid_omp.h
index a0be13cbb41fd4a63e0cf9b1a6d8d80944701d43..cde7e9b6502686ca84f7a4d26a985466341e9656 100644
--- a/src/USER-OMP/pair_colloid_omp.h
+++ b/src/USER-OMP/pair_colloid_omp.h
@@ -39,7 +39,7 @@ class PairColloidOMP : public PairColloid, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_coul_cut_omp.cpp b/src/USER-OMP/pair_coul_cut_omp.cpp
index bb19db3d22e9f91f5e3981b052908af6a1f9c4ea..a8473eec38f7ee2ce4c39b2bfa519d8285758924 100644
--- a/src/USER-OMP/pair_coul_cut_omp.cpp
+++ b/src/USER-OMP/pair_coul_cut_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairCoulCutOMP::PairCoulCutOMP(LAMMPS *lmp) :
-  PairCoulCut(lmp), ThrOMP(lmp, PAIR)
+  PairCoulCut(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairCoulCutOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,40 +43,36 @@ void PairCoulCutOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairCoulCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairCoulCutOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
@@ -86,12 +81,13 @@ void PairCoulCutOMP::eval(double **f, int iifrom, int iito, int tid)
 
   ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_coul = force->special_coul;
-  double qqrd2e = force->qqrd2e;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -142,7 +138,7 @@ void PairCoulCutOMP::eval(double **f, int iifrom, int iito, int tid)
 	  ecoul = factor_coul * qqrd2e * scale[itype][jtype] * qtmp*q[j]*rinv;
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 0.0,ecoul,fpair,delx,dely,delz,tid);
+				 0.0,ecoul,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_coul_cut_omp.h b/src/USER-OMP/pair_coul_cut_omp.h
index eca9958ff23652ba7d36d76d9bb158ab14cc5258..3499ee4ae639bbe8a10c8e5c2659fb07478a63b1 100644
--- a/src/USER-OMP/pair_coul_cut_omp.h
+++ b/src/USER-OMP/pair_coul_cut_omp.h
@@ -39,7 +39,7 @@ class PairCoulCutOMP : public PairCoulCut, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_coul_debye_omp.cpp b/src/USER-OMP/pair_coul_debye_omp.cpp
index 1c2e7b8e07deb62cfd30191abddd102518d2f51e..73e579262e2fa86d86e7c6a56ae263eadb391ba7 100644
--- a/src/USER-OMP/pair_coul_debye_omp.cpp
+++ b/src/USER-OMP/pair_coul_debye_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairCoulDebyeOMP::PairCoulDebyeOMP(LAMMPS *lmp) :
-  PairCoulDebye(lmp), ThrOMP(lmp, PAIR)
+  PairCoulDebye(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairCoulDebyeOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,40 +43,36 @@ void PairCoulDebyeOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairCoulDebyeOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairCoulDebyeOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
@@ -86,12 +81,13 @@ void PairCoulDebyeOMP::eval(double **f, int iifrom, int iito, int tid)
 
   ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_coul = force->special_coul;
-  double qqrd2e = force->qqrd2e;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -144,7 +140,7 @@ void PairCoulDebyeOMP::eval(double **f, int iifrom, int iito, int tid)
 	  ecoul = factor_coul * qqrd2e * qtmp*q[j] * rinv * screening;
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 0.0,ecoul,fpair,delx,dely,delz,tid);
+				 0.0,ecoul,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_coul_debye_omp.h b/src/USER-OMP/pair_coul_debye_omp.h
index 7ad599bb1b70b978e24502bb148f8338d7e740b1..f016de8b5d8e8a7e29de1ddb8a6f63f8e92decf1 100644
--- a/src/USER-OMP/pair_coul_debye_omp.h
+++ b/src/USER-OMP/pair_coul_debye_omp.h
@@ -39,7 +39,7 @@ class PairCoulDebyeOMP : public PairCoulDebye, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_coul_long_omp.cpp b/src/USER-OMP/pair_coul_long_omp.cpp
index 3a2e05159128af8c84a3cd3e40e8351a4a18f23c..82f070d37dffd357c4cbdd933487e87abe4590ae 100644
--- a/src/USER-OMP/pair_coul_long_omp.cpp
+++ b/src/USER-OMP/pair_coul_long_omp.cpp
@@ -33,7 +33,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairCoulLongOMP::PairCoulLongOMP(LAMMPS *lmp) :
-  PairCoulLong(lmp), ThrOMP(lmp, PAIR)
+  PairCoulLong(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -44,7 +44,6 @@ void PairCoulLongOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -52,40 +51,36 @@ void PairCoulLongOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairCoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairCoulLongOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itable,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
@@ -96,12 +91,13 @@ void PairCoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
 
   ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_coul = force->special_coul;
-  double qqrd2e = force->qqrd2e;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -181,7 +177,7 @@ void PairCoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
 	}
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 0.0,ecoul,fpair,delx,dely,delz,tid);
+				 0.0,ecoul,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_coul_long_omp.h b/src/USER-OMP/pair_coul_long_omp.h
index 7b63f762f23e0c11525d9eaa66dfbe24bf9b4ad2..d7655637d0f84cffd7880521b890741777a66c93 100644
--- a/src/USER-OMP/pair_coul_long_omp.h
+++ b/src/USER-OMP/pair_coul_long_omp.h
@@ -39,7 +39,7 @@ class PairCoulLongOMP : public PairCoulLong, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_dipole_cut_omp.cpp b/src/USER-OMP/pair_dipole_cut_omp.cpp
index 9ba93b19b56192b2f90d23fa31738f2174f1eba1..85079dd718a69dbf82d5d860c1285bf752b0ebe2 100644
--- a/src/USER-OMP/pair_dipole_cut_omp.cpp
+++ b/src/USER-OMP/pair_dipole_cut_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairDipoleCutOMP::PairDipoleCutOMP(LAMMPS *lmp) :
-  PairDipoleCut(lmp), ThrOMP(lmp, PAIR)
+  PairDipoleCut(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairDipoleCutOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,40 +43,34 @@ void PairDipoleCutOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f, **torque;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
-    torque = atom->torque + tid*nall;
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, torque, ifrom, ito, tid);
-	else eval<1,1,0>(f, torque, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, torque, ifrom, ito, tid);
-	else eval<1,0,0>(f, torque, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, torque, ifrom, ito, tid);
-      else eval<0,0,0>(f, torque, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces and torques into global arrays.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
-    data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairDipoleCutOMP::eval(double **f, double **torque, int iifrom, int iito, int tid)
+void PairDipoleCutOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,qtmp,delx,dely,delz,evdwl,ecoul;
@@ -90,14 +83,16 @@ void PairDipoleCutOMP::eval(double **f, double **torque, int iifrom, int iito, i
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  double **mu = atom->mu;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_coul = force->special_coul;
-  double *special_lj = force->special_lj;
-  double qqrd2e = force->qqrd2e;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  double * const * const torque = thr->get_torque();
+  const double * const q = atom->q;
+  const double * const * const mu = atom->mu;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double * const special_lj = force->special_lj;
+  const double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp,t1tmp,t2tmp,t3tmp;
 
   ilist = list->ilist;
@@ -265,7 +260,7 @@ void PairDipoleCutOMP::eval(double **f, double **torque, int iifrom, int iito, i
 	}
 
 	if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR,
-				     evdwl,ecoul,fx,fy,fz,delx,dely,delz,tid);
+				     evdwl,ecoul,fx,fy,fz,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_dipole_cut_omp.h b/src/USER-OMP/pair_dipole_cut_omp.h
index 832bd4d3bec4f773b6d27595aa8a066865becd72..b175450c9f14d8692590a6b5fc7748259d55a856 100644
--- a/src/USER-OMP/pair_dipole_cut_omp.h
+++ b/src/USER-OMP/pair_dipole_cut_omp.h
@@ -39,7 +39,7 @@ class PairDipoleCutOMP : public PairDipoleCut, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, double **torque, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_dipole_sf_omp.cpp b/src/USER-OMP/pair_dipole_sf_omp.cpp
index 9ebc72d41437fdec19016ea0448585d4c2b2d090..b920ff5c83f7367b7db5176a1df0221b5ec9c551 100644
--- a/src/USER-OMP/pair_dipole_sf_omp.cpp
+++ b/src/USER-OMP/pair_dipole_sf_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairDipoleSFOMP::PairDipoleSFOMP(LAMMPS *lmp) :
-  PairDipoleSF(lmp), ThrOMP(lmp, PAIR)
+  PairDipoleSF(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairDipoleSFOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,40 +43,34 @@ void PairDipoleSFOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f, **torque;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
-    torque = atom->torque + tid*nall;
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, torque, ifrom, ito, tid);
-	else eval<1,1,0>(f, torque, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, torque, ifrom, ito, tid);
-	else eval<1,0,0>(f, torque, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, torque, ifrom, ito, tid);
-      else eval<0,0,0>(f, torque, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces and torques into global arrays.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
-    data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairDipoleSFOMP::eval(double **f, double **torque, int iifrom, int iito, int tid)
+void PairDipoleSFOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,qtmp,delx,dely,delz,evdwl,ecoul;
@@ -94,14 +87,16 @@ void PairDipoleSFOMP::eval(double **f, double **torque, int iifrom, int iito, in
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  double **mu = atom->mu;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_coul = force->special_coul;
-  double *special_lj = force->special_lj;
-  double qqrd2e = force->qqrd2e;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  double * const * const torque = thr->get_torque();
+  const double * const q = atom->q;
+  const double * const * const mu = atom->mu;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double * const special_lj = force->special_lj;
+  const double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp,t1tmp,t2tmp,t3tmp;
 
   ilist = list->ilist;
@@ -297,7 +292,7 @@ void PairDipoleSFOMP::eval(double **f, double **torque, int iifrom, int iito, in
 	}
 
 	if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR,
-				     evdwl,ecoul,fx,fy,fz,delx,dely,delz,tid);
+				     evdwl,ecoul,fx,fy,fz,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_dipole_sf_omp.h b/src/USER-OMP/pair_dipole_sf_omp.h
index e601e2d569dc4a1b3df8bbbb6bedaadf4ddcfa8a..89c80fa78809cd1708861dfd538a89d38d23a944 100644
--- a/src/USER-OMP/pair_dipole_sf_omp.h
+++ b/src/USER-OMP/pair_dipole_sf_omp.h
@@ -39,7 +39,7 @@ class PairDipoleSFOMP : public PairDipoleSF, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, double **torque, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_dpd_omp.cpp b/src/USER-OMP/pair_dpd_omp.cpp
index be1e32f37d96e9c15c1ddd12e87b5c5618707b81..0d24ce401dda50c8a0a44d6432c1d763ea2e4b0f 100644
--- a/src/USER-OMP/pair_dpd_omp.cpp
+++ b/src/USER-OMP/pair_dpd_omp.cpp
@@ -29,7 +29,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairDPDOMP::PairDPDOMP(LAMMPS *lmp) :
-  PairDPD(lmp), ThrOMP(lmp, PAIR)
+  PairDPD(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
   random_thr = NULL;
@@ -54,7 +54,6 @@ void PairDPDOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -63,46 +62,46 @@ void PairDPDOMP::compute(int eflag, int vflag)
 
   if (!random_thr)
     random_thr = new RanMars*[nthreads];
-  
+
+  // to ensure full compatibility with the serial DPD style
+  // we use is random number generator instance for thread 0
   random_thr[0] = random;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
+    // generate a random number generator instance for
+    // all threads != 0. make sure we use unique seeds.
     if (random_thr && tid > 0)
       random_thr[tid] = new RanMars(Pair::lmp, seed + comm->me 
 				    + comm->nprocs*tid);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairDPDOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairDPDOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -112,14 +111,15 @@ void PairDPDOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  double **v = atom->v;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
-  double dtinvsqrt = 1.0/sqrt(update->dt);
+  const double * const * const x = atom->x;
+  const double * const * const v = atom->v;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double *special_lj = force->special_lj;
+  const double dtinvsqrt = 1.0/sqrt(update->dt);
   double fxtmp,fytmp,fztmp;
-  RanMars &rng = *random_thr[tid];
+  RanMars &rng = *random_thr[thr->get_tid()];
 
   ilist = list->ilist;
   numneigh = list->numneigh;
@@ -190,7 +190,7 @@ void PairDPDOMP::eval(double **f, int iifrom, int iito, int tid)
 	}
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_dpd_omp.h b/src/USER-OMP/pair_dpd_omp.h
index 9385e5444f6545a68fe9dc85c5f4bb8bc0ec8122..c3802f8e6000e4607617c374036027f0fd5933e9 100644
--- a/src/USER-OMP/pair_dpd_omp.h
+++ b/src/USER-OMP/pair_dpd_omp.h
@@ -43,7 +43,7 @@ class PairDPDOMP : public PairDPD, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_dpd_tstat_omp.cpp b/src/USER-OMP/pair_dpd_tstat_omp.cpp
index 7e3fb8b3987cb491c2b3a6ee1d134a54aaf9587f..50a1bf439ebb6efba3bc6d1345a97ff4fd3e9aa8 100644
--- a/src/USER-OMP/pair_dpd_tstat_omp.cpp
+++ b/src/USER-OMP/pair_dpd_tstat_omp.cpp
@@ -29,7 +29,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairDPDTstatOMP::PairDPDTstatOMP(LAMMPS *lmp) :
-  PairDPDTstat(lmp), ThrOMP(lmp, PAIR)
+  PairDPDTstat(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
   random_thr = NULL;
@@ -54,7 +54,6 @@ void PairDPDTstatOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -64,45 +63,45 @@ void PairDPDTstatOMP::compute(int eflag, int vflag)
   if (!random_thr)
     random_thr = new RanMars*[nthreads];
   
+  // to ensure full compatibility with the serial DPD style
+  // we use is random number generator instance for thread 0
   random_thr[0] = random;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
+    // generate a random number generator instance for
+    // all threads != 0. make sure we use unique seeds.
     if (random_thr && tid > 0)
       random_thr[tid] = new RanMars(Pair::lmp, seed + comm->me 
 				    + comm->nprocs*tid);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairDPDTstatOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairDPDTstatOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -112,14 +111,15 @@ void PairDPDTstatOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  double **v = atom->v;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
-  double dtinvsqrt = 1.0/sqrt(update->dt);
+  const double * const * const x = atom->x;
+  const double * const * const v = atom->v;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double *special_lj = force->special_lj;
+  const double dtinvsqrt = 1.0/sqrt(update->dt);
   double fxtmp,fytmp,fztmp;
-  RanMars &rng = *random_thr[tid];
+  RanMars &rng = *random_thr[thr->get_tid()];
 
   // adjust sigma if target T is changing
 
@@ -192,7 +192,7 @@ void PairDPDTstatOMP::eval(double **f, int iifrom, int iito, int tid)
 	}
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 0.0,0.0,fpair,delx,dely,delz,tid);
+				 0.0,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_dpd_tstat_omp.h b/src/USER-OMP/pair_dpd_tstat_omp.h
index 14f640a9259f2a96b5c77960e69e972399de61d6..87c9de5505db70fa491e1e9a178614634f7bb319 100644
--- a/src/USER-OMP/pair_dpd_tstat_omp.h
+++ b/src/USER-OMP/pair_dpd_tstat_omp.h
@@ -43,7 +43,7 @@ class PairDPDTstatOMP : public PairDPDTstat, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_eam_omp.cpp b/src/USER-OMP/pair_eam_omp.cpp
index 0ae4d54fb7e0369f7840c6abf530640e825d2220..c014eb75e29dd2e3d5956b4bc59023823d7703a9 100644
--- a/src/USER-OMP/pair_eam_omp.cpp
+++ b/src/USER-OMP/pair_eam_omp.cpp
@@ -28,7 +28,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairEAMOMP::PairEAMOMP(LAMMPS *lmp) :
-  PairEAM(lmp), ThrOMP(lmp, PAIR)
+  PairEAM(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -39,7 +39,6 @@ void PairEAMOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -58,42 +57,39 @@ void PairEAMOMP::compute(int eflag, int vflag)
   }
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f, *rho_t;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+    
     if (force->newton_pair)
-      rho_t = rho + tid*nall;
-    else rho_t = rho + tid*atom->nlocal;
+      thr->init_eam(nall, rho);
+    else
+      thr->init_eam(atom->nlocal, rho);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, rho_t, ifrom, ito, tid);
-	else eval<1,1,0>(f, rho_t, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, rho_t, ifrom, ito, tid);
-	else eval<1,0,0>(f, rho_t, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, rho_t, ifrom, ito, tid);
-      else eval<0,0,0>(f, rho_t, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairEAMOMP::eval(double **f, double *rho_t,
-		      int iifrom, int iito, int tid)
+void PairEAMOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,m,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -103,10 +99,15 @@ void PairEAMOMP::eval(double **f, double *rho_t,
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  int nall = nlocal + atom->nghost;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  double * const rho_t = thr->get_rho();
+  const int tid = thr->get_tid();
+  const int nthreads = comm->nthreads;
+
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const int nall = nlocal + atom->nghost;
 
   double fxtmp,fytmp,fztmp;
 
@@ -114,11 +115,6 @@ void PairEAMOMP::eval(double **f, double *rho_t,
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
-  // zero out density 
-
-  if (NEWTON_PAIR) memset(rho_t, 0, nall*sizeof(double));
-  else memset(rho_t, 0, nlocal*sizeof(double));
-
   // rho = density at each atom
   // loop over neighbors of my atoms
 
@@ -164,7 +160,7 @@ void PairEAMOMP::eval(double **f, double *rho_t,
 
   if (NEWTON_PAIR) {
     // reduce per thread density
-    data_reduce_thr(&(rho[0]), nall, comm->nthreads, 1, tid);
+    data_reduce_thr(rho, nall, nthreads, 1, tid);
 
     // wait until reduction is complete
     sync_threads();
@@ -178,7 +174,7 @@ void PairEAMOMP::eval(double **f, double *rho_t,
     sync_threads();
   
   } else {
-    data_reduce_thr(&(rho[0]), nlocal, comm->nthreads, 1, tid);
+    data_reduce_thr(rho, nlocal, nthreads, 1, tid);
 
     // wait until reduction is complete
     sync_threads();
@@ -198,8 +194,7 @@ void PairEAMOMP::eval(double **f, double *rho_t,
     fp[i] = (coeff[0]*p + coeff[1])*p + coeff[2];
     if (EFLAG) {
       phi = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
-      if (eflag_global) eng_vdwl_thr[tid] += phi;
-      if (eflag_atom) eatom_thr[tid][i] += phi;
+      e_tally_thr(this, i, i, nlocal, NEWTON_PAIR, phi, 0.0, thr);
     }
   }
 
@@ -283,7 +278,7 @@ void PairEAMOMP::eval(double **f, double *rho_t,
 
 	if (EFLAG) evdwl = phi;
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_eam_omp.h b/src/USER-OMP/pair_eam_omp.h
index 1184cb34bcc4236a7509dcca17ede5cd8a70357e..6b0f1274fcd9419934a7d43d35832581d0e9d40f 100644
--- a/src/USER-OMP/pair_eam_omp.h
+++ b/src/USER-OMP/pair_eam_omp.h
@@ -39,7 +39,7 @@ class PairEAMOMP : public PairEAM, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, double *rho_t, int iifrom, int iito, int tid);
+  void eval(int iifrom, int iito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_edip_omp.cpp b/src/USER-OMP/pair_edip_omp.cpp
index 65b05c8143b2f92c59fc248b2940298a340a2590..f0d6d47cecce8eb009cf75a9c0e0e1dee2aea270 100644
--- a/src/USER-OMP/pair_edip_omp.cpp
+++ b/src/USER-OMP/pair_edip_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairEDIPOMP::PairEDIPOMP(LAMMPS *lmp) :
-  PairEDIP(lmp), ThrOMP(lmp, PAIR)
+  PairEDIP(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairEDIPOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = vflag_atom = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,35 +43,31 @@ void PairEDIPOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (vflag_atom) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (vflag_atom) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (vflag_atom) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (vflag_atom) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
-    } else eval<0,0,0>(f, ifrom, ito, tid);
+    } else eval<0,0,0>(ifrom, ito, thr);
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int VFLAG_ATOM>
-void PairEDIPOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairEDIPOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,k,ii,inum,jnum;
   int itype,jtype,ktype,ijparam,ikparam,ijkparam;
@@ -133,6 +128,8 @@ void PairEDIPOMP::eval(double **f, int iifrom, int iito, int tid)
   double potentia3B_factor;
   double potential2B_factor;
 
+  const int tid = thr->get_tid();
+
   double *pre_thrInvR_ij = preInvR_ij + tid * leadDimInteractionList;
   double *pre_thrExp3B_ij = preExp3B_ij + tid * leadDimInteractionList;
   double *pre_thrExp3BDerived_ij = preExp3BDerived_ij + tid * leadDimInteractionList;
@@ -141,9 +138,10 @@ void PairEDIPOMP::eval(double **f, int iifrom, int iito, int tid)
   double *pre_thrPow2B_ij = prePow2B_ij + tid * leadDimInteractionList;
   double *pre_thrForceCoord = preForceCoord + tid * leadDimInteractionList;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
 
   inum = list->inum;
   ilist = list->ilist;
@@ -340,7 +338,7 @@ void PairEDIPOMP::eval(double **f, int iifrom, int iito, int tid)
       evdwl = (exp2B_ij * potential2B_factor);
 
       if (EVFLAG) ev_tally_thr(this,i, j, nlocal, /* newton_pair */ 1, evdwl, 0.0,
-			       -forceMod2B*invR_ij, dr_ij[0], dr_ij[1], dr_ij[2],tid);
+			       -forceMod2B*invR_ij, dr_ij[0], dr_ij[1], dr_ij[2],thr);
 
       // three-body Forces
 
@@ -435,7 +433,7 @@ void PairEDIPOMP::eval(double **f, int iifrom, int iito, int tid)
 
           evdwl = (exp3B_ij * exp3B_ik * potentia3B_factor);
 
-          if (evflag) ev_tally3(i,j,k,evdwl,0.0,f_ij,f_ik,dr_ij,dr_ik);
+          if (evflag) ev_tally3_thr(this,i,j,k,evdwl,0.0,f_ij,f_ik,dr_ij,dr_ik,thr);
       }
     }
 
@@ -469,7 +467,7 @@ void PairEDIPOMP::eval(double **f, int iifrom, int iito, int tid)
 
         evdwl = 0.0;
         if (EVFLAG) ev_tally_thr(this,i, j, nlocal, /* newton_pair */ 1, 0.0, 0.0,
-				 forceModCoord_ij, dr_ij[0], dr_ij[1], dr_ij[2],tid);
+				 forceModCoord_ij, dr_ij[0], dr_ij[1], dr_ij[2],thr);
     }
   }
 }
diff --git a/src/USER-OMP/pair_edip_omp.h b/src/USER-OMP/pair_edip_omp.h
index 55c34db345ae7406db63d8a1cf0d920ee03d1a8c..55e10c83bb58ee1d3263f821159ea3bf356f112e 100644
--- a/src/USER-OMP/pair_edip_omp.h
+++ b/src/USER-OMP/pair_edip_omp.h
@@ -34,7 +34,7 @@ class PairEDIPOMP : public PairEDIP, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int VFLAG_ATOM>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_eim_omp.cpp b/src/USER-OMP/pair_eim_omp.cpp
index d31ad201207e359faa6afb2c83c9e840580b9c64..7184adb7813269953f1e63f21e7717b4a9fee791 100644
--- a/src/USER-OMP/pair_eim_omp.cpp
+++ b/src/USER-OMP/pair_eim_omp.cpp
@@ -28,7 +28,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairEIMOMP::PairEIMOMP(LAMMPS *lmp) :
-  PairEIM(lmp), ThrOMP(lmp, PAIR)
+  PairEIM(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -39,7 +39,6 @@ void PairEIMOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -58,46 +57,39 @@ void PairEIMOMP::compute(int eflag, int vflag)
   }
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f, *rho_t, *fp_t;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
-    if (force->newton_pair) {
-      rho_t = rho + tid*nall;
-      fp_t = fp + tid*nall;
-    } else {
-      rho_t = rho + tid*atom->nlocal;
-      fp_t = fp + tid*atom->nlocal;
-    }
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+    
+    if (force->newton_pair)
+      thr->init_eim(nall, rho, fp);
+    else
+      thr->init_eim(atom->nlocal, rho, fp);
     
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, rho_t, fp_t, ifrom, ito, tid);
-	else eval<1,1,0>(f, rho_t, fp_t, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, rho_t, fp_t, ifrom, ito, tid);
-	else eval<1,0,0>(f, rho_t, fp_t, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, rho_t, fp_t, ifrom, ito, tid);
-      else eval<0,0,0>(f, rho_t, fp_t, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t,
-		      int iifrom, int iito, int tid)
+void PairEIMOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,m,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -107,10 +99,17 @@ void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t,
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  int nall = nlocal + atom->nghost;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  double * const rho_t = thr->get_rho();
+  double * const fp_t = thr->get_fp();
+  const int tid = thr->get_tid();
+  const int nthreads = comm->nthreads;
+
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const int nall = nlocal + atom->nghost;
 
   double fxtmp,fytmp,fztmp;
 
@@ -118,16 +117,6 @@ void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t,
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
-  // zero out density and fp
-
-  if (NEWTON_PAIR) {
-    memset(rho_t, 0, nall*sizeof(double));
-    memset(fp_t, 0, nall*sizeof(double));
-  } else {
-    memset(rho_t, 0, nlocal*sizeof(double));
-    memset(fp_t, 0, nlocal*sizeof(double));
-  }
-
   // rho = density at each atom
   // loop over neighbors of my atoms
 
@@ -171,7 +160,7 @@ void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t,
   // communicate and sum densities
   if (NEWTON_PAIR) {
     // reduce per thread density
-    data_reduce_thr(&(rho[0]), nall, comm->nthreads, 1, tid);
+    data_reduce_thr(rho, nall, nthreads, 1, tid);
 
     // wait until reduction is complete
     sync_threads();
@@ -185,7 +174,7 @@ void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t,
     }
 
   } else {
-    data_reduce_thr(&(rho[0]), nlocal, comm->nthreads, 1, tid);
+    data_reduce_thr(rho, nlocal, nthreads, 1, tid);
 
     // wait until reduction is complete
     sync_threads();
@@ -243,7 +232,7 @@ void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t,
   // communicate and sum modified densities
   if (NEWTON_PAIR) {
     // reduce per thread density
-    data_reduce_thr(&(fp[0]), nall, comm->nthreads, 1, tid);
+    data_reduce_thr(fp, nall, nthreads, 1, tid);
 
     // wait until reduction is complete
     sync_threads();
@@ -257,7 +246,7 @@ void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t,
     }
 
   } else {
-    data_reduce_thr(&(fp[0]), nlocal, comm->nthreads, 1, tid);
+    data_reduce_thr(fp, nlocal, nthreads, 1, tid);
 
     // wait until reduction is complete
     sync_threads();
@@ -279,8 +268,7 @@ void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t,
     itype = type[i];
     if (EFLAG) {
       phi = 0.5*rho[i]*fp[i];
-      if (eflag_global) eng_vdwl_thr[tid] += phi;
-      if (eflag_atom) eatom_thr[tid][i] += phi;
+      e_tally_thr(this, i, i, nlocal, NEWTON_PAIR, phi, 0.0, thr);
     }
   }
 
@@ -345,7 +333,7 @@ void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t,
 
 	if (EFLAG) evdwl = phi-q0[itype]*q0[jtype]*coul;
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_eim_omp.h b/src/USER-OMP/pair_eim_omp.h
index 3693492e09158492c302d0a71a21c0a4036dbeab..ad273e28eb480c5f6f38ab80f70b2b9f96c6a910 100644
--- a/src/USER-OMP/pair_eim_omp.h
+++ b/src/USER-OMP/pair_eim_omp.h
@@ -39,7 +39,7 @@ class PairEIMOMP : public PairEIM, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, double *rho_t, double *fp_t, int iifrom, int iito, int tid);
+  void eval(int iifrom, int iito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_gauss_omp.cpp b/src/USER-OMP/pair_gauss_omp.cpp
index e8b255d0b78ae9641f3ff27f13eee61df8fad1cc..4f26670715d86d0b635c7063340554d59880a1ff 100644
--- a/src/USER-OMP/pair_gauss_omp.cpp
+++ b/src/USER-OMP/pair_gauss_omp.cpp
@@ -26,7 +26,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairGaussOMP::PairGaussOMP(LAMMPS *lmp) :
-  PairGauss(lmp), ThrOMP(lmp, PAIR)
+  PairGauss(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -37,46 +37,44 @@ void PairGaussOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
+  double occ = 0.0;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag) reduction(+:occ)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) occ = eval<1,1,1>(ifrom, ito, thr);
+	else occ = eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) occ = eval<1,0,1>(ifrom, ito, thr);
+	else occ = eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) occ = eval<0,0,1>(ifrom, ito, thr);
+      else occ = eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
+  if (eflag_global) pvector[0] = occ;
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairGaussOMP::eval(double **f, int iifrom, int iito, int tid)
+double PairGaussOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -86,10 +84,11 @@ void PairGaussOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -149,14 +148,14 @@ void PairGaussOMP::eval(double **f, int iifrom, int iito, int tid)
 	}
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
-  if (eflag_global) pvector[0] = occ;
+  return occ;
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/USER-OMP/pair_gauss_omp.h b/src/USER-OMP/pair_gauss_omp.h
index 7f8fc9a85b091c31415f9e28657d772e21ccaf34..81d9d0ce3f2484847b2cdff9685fd18576c16fcc 100644
--- a/src/USER-OMP/pair_gauss_omp.h
+++ b/src/USER-OMP/pair_gauss_omp.h
@@ -39,7 +39,7 @@ class PairGaussOMP : public PairGauss, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  double eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_gayberne_omp.cpp b/src/USER-OMP/pair_gayberne_omp.cpp
index ff115e8ef775a01a22c08fa5159b0c14a91ded23..d8ec6c9b3236939f20df2867f698426b60530e4f 100644
--- a/src/USER-OMP/pair_gayberne_omp.cpp
+++ b/src/USER-OMP/pair_gayberne_omp.cpp
@@ -27,7 +27,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairGayBerneOMP::PairGayBerneOMP(LAMMPS *lmp) :
-  PairGayBerne(lmp), ThrOMP(lmp, PAIR)
+  PairGayBerne(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -38,7 +38,6 @@ void PairGayBerneOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -46,40 +45,34 @@ void PairGayBerneOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f, **torque;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
-    torque = atom->torque + tid*nall;
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, torque, ifrom, ito, tid);
-	else eval<1,1,0>(f, torque, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, torque, ifrom, ito, tid);
-	else eval<1,0,0>(f, torque, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, torque, ifrom, ito, tid);
-      else eval<0,0,0>(f, torque, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces and torques into global arrays.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
-    data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairGayBerneOMP::eval(double **f, double **tor, int iifrom, int iito, int tid)
+void PairGayBerneOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double evdwl,one_eng,rsq,r2inv,r6inv,forcelj,factor_lj;
@@ -88,11 +81,13 @@ void PairGayBerneOMP::eval(double **f, double **tor, int iifrom, int iito, int t
   int *ilist,*jlist,*numneigh,**firstneigh;
   double *iquat,*jquat;
 
-  double **x = atom->x;
-  int *ellipsoid = atom->ellipsoid;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  double * const * const tor = thr->get_torque();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
+  const int * const ellipsoid = atom->ellipsoid;
 
   AtomVecEllipsoid::Bonus *bonus = avec->bonus;
 
@@ -108,6 +103,7 @@ void PairGayBerneOMP::eval(double **f, double **tor, int iifrom, int iito, int t
 
     i = ilist[ii];
     itype = type[i];
+    fxtmp=fytmp=fztmp=t1tmp=t2tmp=t3tmp=0.0;
 
     if (form[itype][itype] == ELLIPSE_ELLIPSE) {
       iquat = bonus[ellipsoid[i]].quat;
@@ -187,12 +183,12 @@ void PairGayBerneOMP::eval(double **f, double **tor, int iifrom, int iito, int t
 	ttor[1] *= factor_lj;
 	ttor[2] *= factor_lj;
 
-        f[i][0] += fforce[0];
-	f[i][1] += fforce[1];
-	f[i][2] += fforce[2];
-        tor[i][0] += ttor[0];
-	tor[i][1] += ttor[1];
-	tor[i][2] += ttor[2];
+        fxtmp += fforce[0];
+	fytmp += fforce[1];
+	fztmp += fforce[2];
+        t1tmp += ttor[0];
+	t2tmp += ttor[1];
+	t3tmp += ttor[2];
 
         if (NEWTON_PAIR || j < nlocal) {
           rtor[0] *= factor_lj;
@@ -210,9 +206,15 @@ void PairGayBerneOMP::eval(double **f, double **tor, int iifrom, int iito, int t
 
 	if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR,
 				     evdwl,0.0,fforce[0],fforce[1],fforce[2],
-				     -r12[0],-r12[1],-r12[2],tid);
+				     -r12[0],-r12[1],-r12[2],thr);
       }
     }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+    tor[i][0] += t1tmp;
+    tor[i][1] += t2tmp;
+    tor[i][2] += t3tmp;
   }
 }
 
diff --git a/src/USER-OMP/pair_gayberne_omp.h b/src/USER-OMP/pair_gayberne_omp.h
index 737b4ec67de0868f4ab394e21683852b59f5ed9d..0bd0b8b086464597e51d1f18fb557a16046d1238 100644
--- a/src/USER-OMP/pair_gayberne_omp.h
+++ b/src/USER-OMP/pair_gayberne_omp.h
@@ -39,7 +39,7 @@ class PairGayBerneOMP : public PairGayBerne, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, double **torque, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_gran_hertz_history_omp.cpp b/src/USER-OMP/pair_gran_hertz_history_omp.cpp
index 1866833afed41263a14967426188acb1cb9f208b..23b8b8f5c20c99bf720b057772fb1b0e5b1aa01a 100644
--- a/src/USER-OMP/pair_gran_hertz_history_omp.cpp
+++ b/src/USER-OMP/pair_gran_hertz_history_omp.cpp
@@ -26,7 +26,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairGranHertzHistoryOMP::PairGranHertzHistoryOMP(LAMMPS *lmp) :
-  PairGranHertzHistory(lmp), ThrOMP(lmp, PAIR)
+  PairGranHertzHistory(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -37,7 +37,6 @@ void PairGranHertzHistoryOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int shearupdate = (update->ntimestep > laststep) ? 1 : 0;
@@ -47,35 +46,29 @@ void PairGranHertzHistoryOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f, **torque;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
-    torque = atom->torque + tid*nall;
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag)
-      if (shearupdate) eval<1,1>(f, torque, ifrom, ito, tid);
-      else eval<1,0>(f, torque, ifrom, ito, tid);
+      if (shearupdate) eval<1,1>(ifrom, ito, thr);
+      else eval<1,0>(ifrom, ito, thr);
     else 
-      if (shearupdate) eval<0,1>(f, torque, ifrom, ito, tid);
-      else eval<0,0>(f, torque, ifrom, ito, tid);
+      if (shearupdate) eval<0,1>(ifrom, ito, thr);
+      else eval<0,0>(ifrom, ito, thr);
 
-    // reduce per thread forces and torque into global arrays.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
-    data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-
   laststep = update->ntimestep;
 }
 
 template <int EVFLAG, int SHEARUPDATE>
-void PairGranHertzHistoryOMP::eval(double **f, double **torque, int iifrom, int iito, int tid)
+void PairGranHertzHistoryOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz;
@@ -90,15 +83,17 @@ void PairGranHertzHistoryOMP::eval(double **f, double **torque, int iifrom, int
   int *touch,**firsttouch;
   double *shear,*allshear,**firstshear;
 
-  double **x = atom->x;
-  double **v = atom->v;
-  double **omega = atom->omega;
-  double *radius = atom->radius;
-  double *rmass = atom->rmass;
-  double *mass = atom->mass;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  int nlocal = atom->nlocal;
+  const double * const * const x = atom->x;
+  const double * const * const v = atom->v;
+  const double * const * const omega = atom->omega;
+  const double * const radius = atom->radius;
+  const double * const rmass = atom->rmass;
+  const double * const mass = atom->mass;
+  double * const * const f = thr->get_f();
+  double * const * const torque = thr->get_torque();
+  const int * const type = atom->type;
+  const int * const mask = atom->mask;
+  const int nlocal = atom->nlocal;
   double fxtmp,fytmp,fztmp;
   double t1tmp,t2tmp,t3tmp;
 
@@ -274,7 +269,7 @@ void PairGranHertzHistoryOMP::eval(double **f, double **torque, int iifrom, int
 	}
 
 	if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,/* newton_pair */ 0,
-				     0.0,0.0,fx,fy,fz,delx,dely,delz,tid);
+				     0.0,0.0,fx,fy,fz,delx,dely,delz,thr);
 
       }
     }
diff --git a/src/USER-OMP/pair_gran_hertz_history_omp.h b/src/USER-OMP/pair_gran_hertz_history_omp.h
index 66d7bc0fa5e7c594887d10d663ed81c3efe5acf8..956e057093f0acf6b87b814a8adfae901e64a1c2 100644
--- a/src/USER-OMP/pair_gran_hertz_history_omp.h
+++ b/src/USER-OMP/pair_gran_hertz_history_omp.h
@@ -39,7 +39,7 @@ class PairGranHertzHistoryOMP : public PairGranHertzHistory, public ThrOMP {
 
  private:
   template <int EVFLAG, int SHEARUPDATE>
-  void eval(double **f, double **torque, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_gran_hooke_history_omp.cpp b/src/USER-OMP/pair_gran_hooke_history_omp.cpp
index ad0537b516a437895857176dbd6508a095c6bb12..5212b30ce296883cf605463a856d6ba5c8c753ec 100644
--- a/src/USER-OMP/pair_gran_hooke_history_omp.cpp
+++ b/src/USER-OMP/pair_gran_hooke_history_omp.cpp
@@ -28,7 +28,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairGranHookeHistoryOMP::PairGranHookeHistoryOMP(LAMMPS *lmp) :
-  PairGranHookeHistory(lmp), ThrOMP(lmp, PAIR)
+  PairGranHookeHistory(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
   // trigger use of OpenMP version of FixShearHistory
@@ -42,7 +42,6 @@ void PairGranHookeHistoryOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int shearupdate = (update->ntimestep > laststep) ? 1 : 0;
@@ -52,38 +51,33 @@ void PairGranHookeHistoryOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f, **torque;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
-    torque = atom->torque + tid*nall;
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag)
-      if (shearupdate) eval<1,1>(f, torque, ifrom, ito, tid);
-      else eval<1,0>(f, torque, ifrom, ito, tid);
+      if (shearupdate) eval<1,1>(ifrom, ito, thr);
+      else eval<1,0>(ifrom, ito, thr);
     else 
-      if (shearupdate) eval<0,1>(f, torque, ifrom, ito, tid);
-      else eval<0,0>(f, torque, ifrom, ito, tid);
+      if (shearupdate) eval<0,1>(ifrom, ito, thr);
+      else eval<0,0>(ifrom, ito, thr);
 
-    // reduce per thread forces and torque into global arrays.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
-    data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-
   laststep = update->ntimestep;
 }
 
 template <int EVFLAG, int SHEARUPDATE>
-void PairGranHookeHistoryOMP::eval(double **f, double **torque, int iifrom, int iito, int tid)
+void PairGranHookeHistoryOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz;
+  double myshear[3];
   double radi,radj,radsum,rsq,r,rinv,rsqinv;
   double vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3;
   double wr1,wr2,wr3;
@@ -95,15 +89,17 @@ void PairGranHookeHistoryOMP::eval(double **f, double **torque, int iifrom, int
   int *touch,**firsttouch;
   double *shear,*allshear,**firstshear;
 
-  double **x = atom->x;
-  double **v = atom->v;
-  double **omega = atom->omega;
-  double *radius = atom->radius;
-  double *rmass = atom->rmass;
-  double *mass = atom->mass;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  int nlocal = atom->nlocal;
+  const double * const * const x = atom->x;
+  const double * const * const v = atom->v;
+  const double * const * const omega = atom->omega;
+  const double * const radius = atom->radius;
+  const double * const rmass = atom->rmass;
+  const double * const mass = atom->mass;
+  double * const * const f = thr->get_f();
+  double * const * const torque = thr->get_torque();
+  const int * const type = atom->type;
+  const int * const mask = atom->mask;
+  const int nlocal = atom->nlocal;
   double fxtmp,fytmp,fztmp;
   double t1tmp,t2tmp,t3tmp;
 
@@ -144,10 +140,9 @@ void PairGranHookeHistoryOMP::eval(double **f, double **torque, int iifrom, int
 	// unset non-touching neighbors
 
         touch[jj] = 0;
-	shear = &allshear[3*jj];
-        shear[0] = 0.0;
-        shear[1] = 0.0;
-        shear[2] = 0.0;
+        myshear[0] = 0.0;
+        myshear[1] = 0.0;
+        myshear[2] = 0.0;
 
       } else {
 	r = sqrt(rsq);
@@ -186,7 +181,6 @@ void PairGranHookeHistoryOMP::eval(double **f, double **torque, int iifrom, int
 	  if (mask[i] & freeze_group_bit) meff = rmass[j];
 	  if (mask[j] & freeze_group_bit) meff = rmass[i];
 	} else {
-	  itype = type[i];
 	  jtype = type[j];
 	  meff = mass[itype]*mass[jtype] / (mass[itype]+mass[jtype]);
 	  if (mask[i] & freeze_group_bit) meff = mass[jtype];
@@ -207,31 +201,31 @@ void PairGranHookeHistoryOMP::eval(double **f, double **torque, int iifrom, int
 	// shear history effects
 
 	touch[jj] = 1;
-	shear = &allshear[3*jj];
+	memcpy(myshear,allshear + 3*jj, 3*sizeof(double));
 
 	if (SHEARUPDATE) {
-	  shear[0] += vtr1*dt;
-	  shear[1] += vtr2*dt;
-	  shear[2] += vtr3*dt;
+	  myshear[0] += vtr1*dt;
+	  myshear[1] += vtr2*dt;
+	  myshear[2] += vtr3*dt;
 	}
-        shrmag = sqrt(shear[0]*shear[0] + shear[1]*shear[1] +
-		      shear[2]*shear[2]);
+        shrmag = sqrt(myshear[0]*myshear[0] + myshear[1]*myshear[1] +
+		      myshear[2]*myshear[2]);
 
 	// rotate shear displacements
 
-	rsht = shear[0]*delx + shear[1]*dely + shear[2]*delz;
+	rsht = myshear[0]*delx + myshear[1]*dely + myshear[2]*delz;
 	rsht *= rsqinv;
 	if (SHEARUPDATE) {
-	  shear[0] -= rsht*delx;
-	  shear[1] -= rsht*dely;
-	  shear[2] -= rsht*delz;
+	  myshear[0] -= rsht*delx;
+	  myshear[1] -= rsht*dely;
+	  myshear[2] -= rsht*delz;
 	}
 
 	// tangential forces = shear + tangential velocity damping
 
-	fs1 = - (kt*shear[0] + meff*gammat*vtr1);
-	fs2 = - (kt*shear[1] + meff*gammat*vtr2);
-	fs3 = - (kt*shear[2] + meff*gammat*vtr3);
+	fs1 = - (kt*myshear[0] + meff*gammat*vtr1);
+	fs2 = - (kt*myshear[1] + meff*gammat*vtr2);
+	fs3 = - (kt*myshear[2] + meff*gammat*vtr3);
 
 	// rescale frictional displacements and forces if needed
 
@@ -242,9 +236,9 @@ void PairGranHookeHistoryOMP::eval(double **f, double **torque, int iifrom, int
 	  if (shrmag != 0.0) {
 	    const double fnfs = fn/fs;
 	    const double mgkt = meff*gammat/kt;
-	    shear[0] = fnfs * (shear[0] + mgkt*vtr1) - mgkt*vtr1;
-	    shear[1] = fnfs * (shear[1] + mgkt*vtr2) - mgkt*vtr2;
-	    shear[2] = fnfs * (shear[2] + mgkt*vtr3) - mgkt*vtr3;
+	    myshear[0] = fnfs * (myshear[0] + mgkt*vtr1) - mgkt*vtr1;
+	    myshear[1] = fnfs * (myshear[1] + mgkt*vtr2) - mgkt*vtr2;
+	    myshear[2] = fnfs * (myshear[2] + mgkt*vtr3) - mgkt*vtr3;
 	    fs1 *= fnfs;
 	    fs2 *= fnfs;
 	    fs3 *= fnfs;
@@ -277,9 +271,10 @@ void PairGranHookeHistoryOMP::eval(double **f, double **torque, int iifrom, int
 	}
 
 	if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,/* newton_pair */ 0,
-				     0.0,0.0,fx,fy,fz,delx,dely,delz,tid);
+				     0.0,0.0,fx,fy,fz,delx,dely,delz,thr);
 
       }
+      memcpy(allshear + 3*jj, myshear, 3*sizeof(double));
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
diff --git a/src/USER-OMP/pair_gran_hooke_history_omp.h b/src/USER-OMP/pair_gran_hooke_history_omp.h
index 33325025fcce2c14628e926f01516b7637ddb654..7588469e744408815af12be7f0e1b97ac24d54a2 100644
--- a/src/USER-OMP/pair_gran_hooke_history_omp.h
+++ b/src/USER-OMP/pair_gran_hooke_history_omp.h
@@ -39,7 +39,7 @@ class PairGranHookeHistoryOMP : public PairGranHookeHistory, public ThrOMP {
 
  private:
   template <int EVFLAG, int SHEARUPDATE>
-  void eval(double **f, double **torque, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_gran_hooke_omp.cpp b/src/USER-OMP/pair_gran_hooke_omp.cpp
index d6991fa453e25ef8a79c68483c0129fc8b38c76a..fda9295b7078a82f105aade025b0fc3b2769a7c1 100644
--- a/src/USER-OMP/pair_gran_hooke_omp.cpp
+++ b/src/USER-OMP/pair_gran_hooke_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairGranHookeOMP::PairGranHookeOMP(LAMMPS *lmp) :
-  PairGranHooke(lmp), ThrOMP(lmp, PAIR)
+  PairGranHooke(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairGranHookeOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,33 +43,28 @@ void PairGranHookeOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f, **torque;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
-    torque = atom->torque + tid*nall;
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag)
-      if (force->newton_pair) eval<1,1>(f, torque, ifrom, ito, tid);
-      else eval<1,0>(f, torque, ifrom, ito, tid);
+      if (force->newton_pair) eval<1,1>(ifrom, ito, thr);
+      else eval<1,0>(ifrom, ito, thr);
     else 
-      if (force->newton_pair) eval<0,1>(f, torque, ifrom, ito, tid);
-      else eval<0,0>(f, torque, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,1>(ifrom, ito, thr);
+      else eval<0,0>(ifrom, ito, thr);
 
-    // reduce per thread forces and torque into global arrays.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
-    data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
 }
 
 template <int EVFLAG, int NEWTON_PAIR>
-void PairGranHookeOMP::eval(double **f, double **torque, int iifrom, int iito, int tid)
+void PairGranHookeOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz;
@@ -82,15 +76,17 @@ void PairGranHookeOMP::eval(double **f, double **torque, int iifrom, int iito, i
   double fn,fs,ft,fs1,fs2,fs3;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
-  double **x = atom->x;
-  double **v = atom->v;
-  double **omega = atom->omega;
-  double *radius = atom->radius;
-  double *rmass = atom->rmass;
-  double *mass = atom->mass;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  int nlocal = atom->nlocal;
+  const double * const * const x = atom->x;
+  const double * const * const v = atom->v;
+  const double * const * const omega = atom->omega;
+  const double * const radius = atom->radius;
+  const double * const rmass = atom->rmass;
+  const double * const mass = atom->mass;
+  double * const * const f = thr->get_f();
+  double * const * const torque = thr->get_torque();
+  const int * const type = atom->type;
+  const int * const mask = atom->mask;
+  const int nlocal = atom->nlocal;
   double fxtmp,fytmp,fztmp;
   double t1tmp,t2tmp,t3tmp;
 
@@ -216,7 +212,7 @@ void PairGranHookeOMP::eval(double **f, double **torque, int iifrom, int iito, i
 	}
 
 	if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR,
-				     0.0,0.0,fx,fy,fz,delx,dely,delz,tid);
+				     0.0,0.0,fx,fy,fz,delx,dely,delz,thr);
 
       }
     }
diff --git a/src/USER-OMP/pair_gran_hooke_omp.h b/src/USER-OMP/pair_gran_hooke_omp.h
index f2b093778c3f675b376b3cdc8f93d0ed9cbbf555..b275992bfacd096687147ec575bcf43596f88003 100644
--- a/src/USER-OMP/pair_gran_hooke_omp.h
+++ b/src/USER-OMP/pair_gran_hooke_omp.h
@@ -39,7 +39,7 @@ class PairGranHookeOMP : public PairGranHooke, public ThrOMP {
 
  private:
   template <int EVFLAG, int NEWTON_PAIR>
-  void eval(double **f, double **torque, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp b/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp
index 012fd596b3998d4e13c8b31bb05c908dc3cfad56..5da3f2bdfa4c9e1a2d4fc42884856cda760512ce 100644
--- a/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp
+++ b/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp
@@ -31,7 +31,7 @@ using namespace MathConst;
 /* ---------------------------------------------------------------------- */
 
 PairHbondDreidingLJOMP::PairHbondDreidingLJOMP(LAMMPS *lmp) :
-  PairHbondDreidingLJ(lmp), ThrOMP(lmp, PAIR)
+  PairHbondDreidingLJ(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
   hbcount_thr = hbeng_thr = NULL;
@@ -54,7 +54,6 @@ void PairHbondDreidingLJOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -72,35 +71,31 @@ void PairHbondDreidingLJOMP::compute(int eflag, int vflag)
   }
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
-
   // reduce per thread hbond data
   if (eflag_global) {
     pvector[0] = 0.0;
@@ -113,25 +108,26 @@ void PairHbondDreidingLJOMP::compute(int eflag, int vflag)
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairHbondDreidingLJOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairHbondDreidingLJOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
-  int i,j,k,m,ii,jj,kk,jnum,knum,itype,jtype,ktype;
+  int i,j,k,m,ii,jj,kk,jnum,itype,jtype,ktype;
   double xtmp,ytmp,ztmp,delx,dely,delz,rsq,rsq1,rsq2,r1,r2;
   double factor_hb,force_angle,force_kernel,evdwl,eng_lj;
   double c,s,a,b,ac,a11,a12,a22,vx1,vx2,vy1,vy2,vz1,vz2;
   double fi[3],fj[3],delr1[3],delr2[3];
   double r2inv,r10inv;
   double switch1,switch2;
-  int *ilist,*jlist,*klist,*numneigh,**firstneigh;
+  int *ilist,*jlist,*numneigh,**firstneigh;
   Param *pm;
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int **special = atom->special;
-  int **nspecial = atom->nspecial;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const double * const special_lj = force->special_lj;
+  const int * const * const nspecial = atom->nspecial;
+  const int * const * const special = atom->special;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -152,8 +148,8 @@ void PairHbondDreidingLJOMP::eval(double **f, int iifrom, int iito, int tid)
     itype = type[i];
     if (!donor[itype]) continue;
 
-    klist = special[i];
-    knum = nspecial[i][0];
+    const int * const klist = special[i];
+    const int knum = nspecial[i][0];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
@@ -270,7 +266,7 @@ void PairHbondDreidingLJOMP::eval(double **f, int iifrom, int iito, int tid)
 
 	    // KIJ instead of IJK b/c delr1/delr2 are both with respect to k
 
-	    if (EVFLAG) ev_tally3_thr(this,k,i,j,evdwl,0.0,fi,fj,delr1,delr2,tid);
+	    if (EVFLAG) ev_tally3_thr(this,k,i,j,evdwl,0.0,fi,fj,delr1,delr2,thr);
 	    if (EFLAG) {
 	      hbcount++;
 	      hbeng += evdwl;
@@ -283,6 +279,7 @@ void PairHbondDreidingLJOMP::eval(double **f, int iifrom, int iito, int tid)
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
+  const int tid = thr->get_tid();
   hbcount_thr[tid] = static_cast<double>(hbcount);
   hbeng_thr[tid] = hbeng;
 }
diff --git a/src/USER-OMP/pair_hbond_dreiding_lj_omp.h b/src/USER-OMP/pair_hbond_dreiding_lj_omp.h
index 1aef78490c2173fdb70a357317cb4a962641f7aa..937391684967734441ea04df84e3782b5f41d55b 100644
--- a/src/USER-OMP/pair_hbond_dreiding_lj_omp.h
+++ b/src/USER-OMP/pair_hbond_dreiding_lj_omp.h
@@ -43,7 +43,7 @@ class PairHbondDreidingLJOMP : public PairHbondDreidingLJ, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp b/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp
index b6c966f8c7d53dcf32325e971f40972f06a23220..bce4efdd3a00a6a7dbda1ec703b95af0fbba6cc7 100644
--- a/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp
+++ b/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp
@@ -31,7 +31,7 @@ using namespace MathConst;
 /* ---------------------------------------------------------------------- */
 
 PairHbondDreidingMorseOMP::PairHbondDreidingMorseOMP(LAMMPS *lmp) :
-  PairHbondDreidingMorse(lmp), ThrOMP(lmp, PAIR)
+  PairHbondDreidingMorse(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
   hbcount_thr = hbeng_thr = NULL;
@@ -54,7 +54,6 @@ void PairHbondDreidingMorseOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -72,35 +71,31 @@ void PairHbondDreidingMorseOMP::compute(int eflag, int vflag)
   }
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
 
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
-
   // reduce per thread hbond data
   if (eflag_global) {
     pvector[0] = 0.0;
@@ -113,24 +108,25 @@ void PairHbondDreidingMorseOMP::compute(int eflag, int vflag)
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairHbondDreidingMorseOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairHbondDreidingMorseOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
-  int i,j,k,m,ii,jj,kk,jnum,knum,itype,jtype,ktype;
+  int i,j,k,m,ii,jj,kk,jnum,itype,jtype,ktype;
   double xtmp,ytmp,ztmp,delx,dely,delz,rsq,rsq1,rsq2,r1,r2;
   double factor_hb,force_angle,force_kernel,evdwl;
   double c,s,a,b,ac,a11,a12,a22,vx1,vx2,vy1,vy2,vz1,vz2;
   double fi[3],fj[3],delr1[3],delr2[3];
   double r,dr,dexp,eng_morse,switch1,switch2;
-  int *ilist,*jlist,*klist,*numneigh,**firstneigh;
+  int *ilist,*jlist,*numneigh,**firstneigh;
   Param *pm;
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int **special = atom->special;
-  int **nspecial = atom->nspecial;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const double * const special_lj = force->special_lj;
+  const int * const * const nspecial = atom->nspecial;
+  const int * const * const special = atom->special;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -151,8 +147,8 @@ void PairHbondDreidingMorseOMP::eval(double **f, int iifrom, int iito, int tid)
     itype = type[i];
     if (!donor[itype]) continue;
 
-    klist = special[i];
-    knum = nspecial[i][0];
+    const int * const klist = special[i];
+    const int knum = nspecial[i][0];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
@@ -268,7 +264,7 @@ void PairHbondDreidingMorseOMP::eval(double **f, int iifrom, int iito, int tid)
 
 	    // KIJ instead of IJK b/c delr1/delr2 are both with respect to k
 
-	    if (EVFLAG) ev_tally3_thr(this,k,i,j,evdwl,0.0,fi,fj,delr1,delr2,tid);
+	    if (EVFLAG) ev_tally3_thr(this,k,i,j,evdwl,0.0,fi,fj,delr1,delr2,thr);
 	    if (EFLAG) {
 	      hbcount++;
 	      hbeng += evdwl;
@@ -281,6 +277,7 @@ void PairHbondDreidingMorseOMP::eval(double **f, int iifrom, int iito, int tid)
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
+  const int tid = thr->get_tid();
   hbcount_thr[tid] = static_cast<double>(hbcount);
   hbeng_thr[tid] = hbeng;
 }
diff --git a/src/USER-OMP/pair_hbond_dreiding_morse_omp.h b/src/USER-OMP/pair_hbond_dreiding_morse_omp.h
index 2a13c618c6eb92ec07844f84a5e46c18e660ee0b..d2edd7281b63d7da1800623e46b6792453f30e3c 100644
--- a/src/USER-OMP/pair_hbond_dreiding_morse_omp.h
+++ b/src/USER-OMP/pair_hbond_dreiding_morse_omp.h
@@ -43,7 +43,7 @@ class PairHbondDreidingMorseOMP : public PairHbondDreidingMorse, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj96_cut_omp.cpp b/src/USER-OMP/pair_lj96_cut_omp.cpp
index f0998363e1d0140c75e57e75d14e54da9dc44c30..68733c10939b3b2b3b31ead51e7b8379479f2d70 100644
--- a/src/USER-OMP/pair_lj96_cut_omp.cpp
+++ b/src/USER-OMP/pair_lj96_cut_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJ96CutOMP::PairLJ96CutOMP(LAMMPS *lmp) :
-  PairLJ96Cut(lmp), ThrOMP(lmp, PAIR)
+  PairLJ96Cut(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairLJ96CutOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,38 +43,34 @@ void PairLJ96CutOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJ96CutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJ96CutOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -84,10 +79,11 @@ void PairLJ96CutOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -141,8 +137,8 @@ void PairLJ96CutOMP::eval(double **f, int iifrom, int iito, int tid)
 	  evdwl *= factor_lj;
 	}
 
-	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_lj96_cut_omp.h b/src/USER-OMP/pair_lj96_cut_omp.h
index 333212303daed1bf9ea9cb76f8b5d7a9658eb32e..a8040320c8a01f63256ba693665204c261191608 100644
--- a/src/USER-OMP/pair_lj96_cut_omp.h
+++ b/src/USER-OMP/pair_lj96_cut_omp.h
@@ -39,7 +39,7 @@ class PairLJ96CutOMP : public PairLJ96Cut, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp
index 32ad05acdacea51a7e35a0b5435ff2aa19d1dd76..edfbe1f527c2c7e3ea7b83404cd567831a24a23f 100644
--- a/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp
+++ b/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJCharmmCoulCharmmImplicitOMP::PairLJCharmmCoulCharmmImplicitOMP(LAMMPS *lmp) :
-  PairLJCharmmCoulCharmmImplicit(lmp), ThrOMP(lmp, PAIR)
+  PairLJCharmmCoulCharmmImplicit(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairLJCharmmCoulCharmmImplicitOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,64 +43,60 @@ void PairLJCharmmCoulCharmmImplicitOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCharmmCoulCharmmImplicitOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJCharmmCoulCharmmImplicitOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
   double philj,switch1,switch2;
-  double invdenom_coul,invdenom_lj;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_coul = force->special_coul;
-  double *special_lj = force->special_lj;
-  double qqrd2e = force->qqrd2e;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double * const special_lj = force->special_lj;
+  const double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
-  invdenom_coul = (denom_coul != 0.0) ? 1.0/denom_coul : 0.0;
-  invdenom_lj   = (denom_lj   != 0.0) ? 1.0/denom_lj   : 0.0;
+  const double invdenom_coul = (denom_coul != 0.0) ? 1.0/denom_coul : 0.0;
+  const double invdenom_lj   = (denom_lj   != 0.0) ? 1.0/denom_lj   : 0.0;
 
   // loop over neighbors of my atoms
 
@@ -193,7 +188,7 @@ void PairLJCharmmCoulCharmmImplicitOMP::eval(double **f, int iifrom, int iito, i
 	}
 	
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,ecoul,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.h b/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.h
index ba016d7d3dcded9f308ed37ea17baefeffe3ad6d..dff01ce499b724a3143a3553c2d5e0c37e890834 100644
--- a/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.h
+++ b/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.h
@@ -39,7 +39,7 @@ class PairLJCharmmCoulCharmmImplicitOMP : public PairLJCharmmCoulCharmmImplicit,
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp
index 6dac7a17f6496f224ff4aced8043be99a980a664..efdcc995dad9ac4ddb132678b59b1b7c35901672 100644
--- a/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp
+++ b/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJCharmmCoulCharmmOMP::PairLJCharmmCoulCharmmOMP(LAMMPS *lmp) :
-  PairLJCharmmCoulCharmm(lmp), ThrOMP(lmp, PAIR)
+  PairLJCharmmCoulCharmm(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairLJCharmmCoulCharmmOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,64 +43,60 @@ void PairLJCharmmCoulCharmmOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCharmmCoulCharmmOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJCharmmCoulCharmmOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
   double philj,switch1,switch2;
-  double invdenom_coul,invdenom_lj;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_coul = force->special_coul;
-  double *special_lj = force->special_lj;
-  double qqrd2e = force->qqrd2e;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double * const special_lj = force->special_lj;
+  const double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
-  invdenom_coul = (denom_coul != 0.0) ? 1.0/denom_coul : 0.0;
-  invdenom_lj   = (denom_lj   != 0.0) ? 1.0/denom_lj   : 0.0;
+  const double invdenom_coul = (denom_coul != 0.0) ? 1.0/denom_coul : 0.0;
+  const double invdenom_lj   = (denom_lj   != 0.0) ? 1.0/denom_lj   : 0.0;
 
   // loop over neighbors of my atoms
 
@@ -193,7 +188,7 @@ void PairLJCharmmCoulCharmmOMP::eval(double **f, int iifrom, int iito, int tid)
 	}
 	
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,ecoul,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.h b/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.h
index f2889b05fea51939c15c211459b7a78702c89b3b..0eda030ebd866631a574ecaf468bd8c4b6f56ca3 100644
--- a/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.h
+++ b/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.h
@@ -39,7 +39,7 @@ class PairLJCharmmCoulCharmmOMP : public PairLJCharmmCoulCharmm, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp
index c99f27f2e12a4867c47c239ea7baa9b1f06ed7e5..f9f32ea119fd5ebefed01f6b72db7f54d8039435 100644
--- a/src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp
+++ b/src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp
@@ -33,7 +33,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJCharmmCoulLongOMP::PairLJCharmmCoulLongOMP(LAMMPS *lmp) :
-  PairLJCharmmCoulLong(lmp), ThrOMP(lmp, PAIR)
+  PairLJCharmmCoulLong(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -44,7 +44,6 @@ void PairLJCharmmCoulLongOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -52,40 +51,36 @@ void PairLJCharmmCoulLongOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCharmmCoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJCharmmCoulLongOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype,itable;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
@@ -97,13 +92,14 @@ void PairLJCharmmCoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_coul = force->special_coul;
-  double *special_lj = force->special_lj;
-  double qqrd2e = force->qqrd2e;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double * const special_lj = force->special_lj;
+  const double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -214,7 +210,7 @@ void PairLJCharmmCoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
 	}
 	
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,ecoul,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_lj_charmm_coul_long_omp.h b/src/USER-OMP/pair_lj_charmm_coul_long_omp.h
index b14e4c1fe48f8c956e76d4daa2040fd1ab2e0c60..91b9c01c1ab4de7aeac310d3866bbcb174a83d88 100644
--- a/src/USER-OMP/pair_lj_charmm_coul_long_omp.h
+++ b/src/USER-OMP/pair_lj_charmm_coul_long_omp.h
@@ -39,7 +39,7 @@ class PairLJCharmmCoulLongOMP : public PairLJCharmmCoulLong, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp b/src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp
index 032188279321e135718371aa9e3dc4cf6b1ec718..e54c348e64d365aa72390f590f8fa58209715d5c 100644
--- a/src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp
+++ b/src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJClass2CoulCutOMP::PairLJClass2CoulCutOMP(LAMMPS *lmp) :
-  PairLJClass2CoulCut(lmp), ThrOMP(lmp, PAIR)
+  PairLJClass2CoulCut(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairLJClass2CoulCutOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,40 +43,36 @@ void PairLJClass2CoulCutOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJClass2CoulCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJClass2CoulCutOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
@@ -87,13 +82,14 @@ void PairLJClass2CoulCutOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_coul = force->special_coul;
-  double *special_lj = force->special_lj;
-  double qqrd2e = force->qqrd2e;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double * const special_lj = force->special_lj;
+  const double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -163,9 +159,9 @@ void PairLJClass2CoulCutOMP::eval(double **f, int iifrom, int iito, int tid)
 	    evdwl *= factor_lj;
 	  } else evdwl = 0.0;
 	}
-	
+
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,ecoul,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_lj_class2_coul_cut_omp.h b/src/USER-OMP/pair_lj_class2_coul_cut_omp.h
index 5fe4895691603381ce33e167f2709c275fab86bc..b22a29aa184038007f0b1666b90a316d9c4cafc1 100644
--- a/src/USER-OMP/pair_lj_class2_coul_cut_omp.h
+++ b/src/USER-OMP/pair_lj_class2_coul_cut_omp.h
@@ -39,7 +39,7 @@ class PairLJClass2CoulCutOMP : public PairLJClass2CoulCut, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_class2_coul_long_omp.cpp b/src/USER-OMP/pair_lj_class2_coul_long_omp.cpp
index 84d26ceb147dfde49aef26a14e0af75d652f8df5..20ad947d23f93cd01009e08b5ec33346236c7d7b 100644
--- a/src/USER-OMP/pair_lj_class2_coul_long_omp.cpp
+++ b/src/USER-OMP/pair_lj_class2_coul_long_omp.cpp
@@ -33,7 +33,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJClass2CoulLongOMP::PairLJClass2CoulLongOMP(LAMMPS *lmp) :
-  PairLJClass2CoulLong(lmp), ThrOMP(lmp, PAIR)
+  PairLJClass2CoulLong(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -44,7 +44,6 @@ void PairLJClass2CoulLongOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -52,40 +51,36 @@ void PairLJClass2CoulLongOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJClass2CoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJClass2CoulLongOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
@@ -95,13 +90,14 @@ void PairLJClass2CoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_coul = force->special_coul;
-  double *special_lj = force->special_lj;
-  double qqrd2e = force->qqrd2e;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double * const special_lj = force->special_lj;
+  const double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -181,7 +177,7 @@ void PairLJClass2CoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
 	}
 	
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,ecoul,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_lj_class2_coul_long_omp.h b/src/USER-OMP/pair_lj_class2_coul_long_omp.h
index da4ac3680f6262f04bd5f85e83ec2ce5d50f552c..b32799bf8494ae128a65ea6448e7fd3c08ee59c0 100644
--- a/src/USER-OMP/pair_lj_class2_coul_long_omp.h
+++ b/src/USER-OMP/pair_lj_class2_coul_long_omp.h
@@ -39,7 +39,7 @@ class PairLJClass2CoulLongOMP : public PairLJClass2CoulLong, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_class2_omp.cpp b/src/USER-OMP/pair_lj_class2_omp.cpp
index 4f5d2550fc5862d6eeeadcc73fed23d2ae02e938..cff80d3f1d5de18acddcebbefea65204b6621c0c 100644
--- a/src/USER-OMP/pair_lj_class2_omp.cpp
+++ b/src/USER-OMP/pair_lj_class2_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJClass2OMP::PairLJClass2OMP(LAMMPS *lmp) :
-  PairLJClass2(lmp), ThrOMP(lmp, PAIR)
+  PairLJClass2(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairLJClass2OMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,38 +43,34 @@ void PairLJClass2OMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJClass2OMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJClass2OMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -84,10 +79,11 @@ void PairLJClass2OMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -141,8 +137,8 @@ void PairLJClass2OMP::eval(double **f, int iifrom, int iito, int tid)
 	  evdwl *= factor_lj;
 	}
 
-	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_lj_class2_omp.h b/src/USER-OMP/pair_lj_class2_omp.h
index cfe24bb714fb37a94436e2ec5099ffabaf579da6..317c7376c5210c62d7d93ab5a1e65cd702f82923 100644
--- a/src/USER-OMP/pair_lj_class2_omp.h
+++ b/src/USER-OMP/pair_lj_class2_omp.h
@@ -39,7 +39,7 @@ class PairLJClass2OMP : public PairLJClass2, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_coul_omp.cpp b/src/USER-OMP/pair_lj_coul_omp.cpp
index 23e2a8d906372f36dc8b8a2690421e4432fac1ec..ae15087ba90f9aec9934dafbdf0d3175f9a88da5 100644
--- a/src/USER-OMP/pair_lj_coul_omp.cpp
+++ b/src/USER-OMP/pair_lj_coul_omp.cpp
@@ -34,7 +34,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJCoulOMP::PairLJCoulOMP(LAMMPS *lmp) :
-  PairLJCoul(lmp), ThrOMP(lmp, PAIR)
+  PairLJCoul(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -45,7 +45,6 @@ void PairLJCoulOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -53,53 +52,50 @@ void PairLJCoulOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCoulOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJCoulOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   double evdwl,ecoul,fpair;
   evdwl = ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_coul = force->special_coul;
-  double *special_lj = force->special_lj;
-  double qqrd2e = force->qqrd2e;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double * const special_lj = force->special_lj;
+  const double qqrd2e = force->qqrd2e;
 
-  double *x0 = x[0];
+  const double *x0 = x[0];
   double *f0 = f[0], *fi = f0;
 
   int *ilist = list->ilist;
@@ -127,7 +123,7 @@ void PairLJCoulOMP::eval(double **f, int iifrom, int iito, int tid)
       ni = sbmask(j);
       j &= NEIGHMASK;
       
-      { register double *xj = x0+(j+(j<<1));
+      { register const double *xj = x0+(j+(j<<1));
 	d[0] = xi[0] - xj[0];				// pair vector
 	d[1] = xi[1] - xj[1];
 	d[2] = xi[2] - xj[2]; }
@@ -218,7 +214,7 @@ void PairLJCoulOMP::eval(double **f, int iifrom, int iito, int tid)
       }
       
       if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
-			       evdwl,ecoul,fpair,d[0],d[1],d[2],tid);
+			       evdwl,ecoul,fpair,d[0],d[1],d[2],thr);
     }
   }
 }
diff --git a/src/USER-OMP/pair_lj_coul_omp.h b/src/USER-OMP/pair_lj_coul_omp.h
index 619e609ba8c50cf634396155024a1f8bcd833c9e..e2259e16a0a031c1fcb63e1612aa28d7920ccadc 100644
--- a/src/USER-OMP/pair_lj_coul_omp.h
+++ b/src/USER-OMP/pair_lj_coul_omp.h
@@ -39,7 +39,7 @@ class PairLJCoulOMP : public PairLJCoul, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_cubic_omp.cpp b/src/USER-OMP/pair_lj_cubic_omp.cpp
index 4f806bd71fe2798e411091901976de51366d9195..09e44a910776012151aab4132dd9482005bf9e32 100644
--- a/src/USER-OMP/pair_lj_cubic_omp.cpp
+++ b/src/USER-OMP/pair_lj_cubic_omp.cpp
@@ -26,7 +26,7 @@ using namespace PairLJCubicConstants;
 /* ---------------------------------------------------------------------- */
 
 PairLJCubicOMP::PairLJCubicOMP(LAMMPS *lmp) :
-  PairLJCubic(lmp), ThrOMP(lmp, PAIR)
+  PairLJCubic(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -37,7 +37,6 @@ void PairLJCubicOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -45,38 +44,34 @@ void PairLJCubicOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCubicOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJCubicOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -86,10 +81,11 @@ void PairLJCubicOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -152,8 +148,8 @@ void PairLJCubicOMP::eval(double **f, int iifrom, int iito, int tid)
 	  evdwl *= factor_lj;
 	}
 
-	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_lj_cubic_omp.h b/src/USER-OMP/pair_lj_cubic_omp.h
index 559a6125ab3719ec1c5537eaa1174b46712a21f1..a6ed7d2b97db9ce3cc23568f7f3a0e43c2549362 100644
--- a/src/USER-OMP/pair_lj_cubic_omp.h
+++ b/src/USER-OMP/pair_lj_cubic_omp.h
@@ -39,7 +39,7 @@ class PairLJCubicOMP : public PairLJCubic, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp
index be98ec38fc89b4b73475495feff9faba852d34b9..46114ce6131e12359d61f5cc314eb088e73706b2 100644
--- a/src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp
+++ b/src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJCutCoulCutOMP::PairLJCutCoulCutOMP(LAMMPS *lmp) :
-  PairLJCutCoulCut(lmp), ThrOMP(lmp, PAIR)
+  PairLJCutCoulCut(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairLJCutCoulCutOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,40 +43,36 @@ void PairLJCutCoulCutOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutCoulCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJCutCoulCutOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
@@ -86,13 +81,14 @@ void PairLJCutCoulCutOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_coul = force->special_coul;
-  double *special_lj = force->special_lj;
-  double qqrd2e = force->qqrd2e;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double * const special_lj = force->special_lj;
+  const double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -159,11 +155,11 @@ void PairLJCutCoulCutOMP::eval(double **f, int iifrom, int iito, int tid)
 	    evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
 	      offset[itype][jtype];
 	    evdwl *= factor_lj;
-	  }
-	} else evdwl = 0.0;
+	  } else evdwl = 0.0;
+	}
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,ecoul,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_lj_cut_coul_cut_omp.h b/src/USER-OMP/pair_lj_cut_coul_cut_omp.h
index c8c34e2591780be3b32f2555571ef7fc073825c2..3d4be420e71cb9809a79aa0ad5edda22010ab326 100644
--- a/src/USER-OMP/pair_lj_cut_coul_cut_omp.h
+++ b/src/USER-OMP/pair_lj_cut_coul_cut_omp.h
@@ -39,7 +39,7 @@ class PairLJCutCoulCutOMP : public PairLJCutCoulCut, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp
index 13a4a1906f9b6218f582bcc08d82a542889125da..9d96f31dba5bcb25c8e331472e661afcf88c4d59 100644
--- a/src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp
+++ b/src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJCutCoulDebyeOMP::PairLJCutCoulDebyeOMP(LAMMPS *lmp) :
-  PairLJCutCoulDebye(lmp), ThrOMP(lmp, PAIR)
+  PairLJCutCoulDebye(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairLJCutCoulDebyeOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,40 +43,36 @@ void PairLJCutCoulDebyeOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutCoulDebyeOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJCutCoulDebyeOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
@@ -87,13 +82,14 @@ void PairLJCutCoulDebyeOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_coul = force->special_coul;
-  double *special_lj = force->special_lj;
-  double qqrd2e = force->qqrd2e;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double * const special_lj = force->special_lj;
+  const double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -129,7 +125,6 @@ void PairLJCutCoulDebyeOMP::eval(double **f, int iifrom, int iito, int tid)
       if (rsq < cutsq[itype][jtype]) {
 	r2inv = 1.0/rsq;
 
-
 	if (rsq < cut_coulsq[itype][jtype]) {
 	  r = sqrt(rsq);
 	  rinv = 1.0/r;
@@ -165,8 +160,9 @@ void PairLJCutCoulDebyeOMP::eval(double **f, int iifrom, int iito, int tid)
 	    evdwl *= factor_lj;
 	  } else evdwl = 0.0;
 	}
+
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,ecoul,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_lj_cut_coul_debye_omp.h b/src/USER-OMP/pair_lj_cut_coul_debye_omp.h
index 00cf540be22cb59f2aeb2647f8286b34141967bd..e2205cb7cef1f696866704f3e772ae018c5ac014 100644
--- a/src/USER-OMP/pair_lj_cut_coul_debye_omp.h
+++ b/src/USER-OMP/pair_lj_cut_coul_debye_omp.h
@@ -39,7 +39,7 @@ class PairLJCutCoulDebyeOMP : public PairLJCutCoulDebye, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_cut_coul_long_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_long_omp.cpp
index 1d8f977c9635d3dff671628957716ae629bec5e3..79976bf8a8eadfcdd1880c692ef28c69334e6bf1 100644
--- a/src/USER-OMP/pair_lj_cut_coul_long_omp.cpp
+++ b/src/USER-OMP/pair_lj_cut_coul_long_omp.cpp
@@ -33,7 +33,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJCutCoulLongOMP::PairLJCutCoulLongOMP(LAMMPS *lmp) :
-  PairLJCutCoulLong(lmp), ThrOMP(lmp, PAIR)
+  PairLJCutCoulLong(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -44,7 +44,6 @@ void PairLJCutCoulLongOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -52,40 +51,36 @@ void PairLJCutCoulLongOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutCoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJCutCoulLongOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype,itable;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
@@ -96,13 +91,14 @@ void PairLJCutCoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_coul = force->special_coul;
-  double *special_lj = force->special_lj;
-  double qqrd2e = force->qqrd2e;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double * const special_lj = force->special_lj;
+  const double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -198,9 +194,9 @@ void PairLJCutCoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
 	    evdwl *= factor_lj;
 	  } else evdwl = 0.0;
 	}
-	
+
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,ecoul,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_lj_cut_coul_long_omp.h b/src/USER-OMP/pair_lj_cut_coul_long_omp.h
index ac408ba886e86cd7dd7514d424950462fa9075be..a907959ae3b518b598a50b41eab9a9128f986303 100644
--- a/src/USER-OMP/pair_lj_cut_coul_long_omp.h
+++ b/src/USER-OMP/pair_lj_cut_coul_long_omp.h
@@ -39,7 +39,7 @@ class PairLJCutCoulLongOMP : public PairLJCutCoulLong, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.cpp
index 6ada944c53b41718eb967a81eea70d08b6217458..78f35709a24c399c277f256c0df4f9c23eccf7bc 100644
--- a/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.cpp
+++ b/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.cpp
@@ -36,7 +36,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJCutCoulLongTIP4POMP::PairLJCutCoulLongTIP4POMP(LAMMPS *lmp) :
-  PairLJCutCoulLongTIP4P(lmp), ThrOMP(lmp, PAIR)
+  PairLJCutCoulLongTIP4P(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 
@@ -61,7 +61,6 @@ void PairLJCutCoulLongTIP4POMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nlocal = atom->nlocal;
@@ -76,8 +75,8 @@ void PairLJCutCoulLongTIP4POMP::compute(int eflag, int vflag)
   }
 
   // cache corrected M positions in mpos[]
-  double **x = atom->x;
-  int *type = atom->type;
+  const double * const * const x = atom->x;
+  const int * const type = atom->type;
   for (int i = 0; i < nlocal; i++) {
     if (type[i] == typeO) {
       find_M(i,h1idx[i],h2idx[i],mpos[i]);
@@ -101,39 +100,35 @@ void PairLJCutCoulLongTIP4POMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (vflag) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (vflag) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (vflag) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (vflag) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      eval<0,0,0>(f, ifrom, ito, tid);
+      eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int VFLAG>
-void PairLJCutCoulLongTIP4POMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJCutCoulLongTIP4POMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype,itable;
   int n,vlist[6];
@@ -151,13 +146,14 @@ void PairLJCutCoulLongTIP4POMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_coul = force->special_coul;
-  double *special_lj = force->special_lj;
-  double qqrd2e = force->qqrd2e;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double * const special_lj = force->special_lj;
+  const double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -216,7 +212,7 @@ void PairLJCutCoulLongTIP4POMP::eval(double **f, int iifrom, int iito, int tid)
 	  } else evdwl = 0.0;
 
 	  if (EVFLAG) ev_tally_thr(this,i,j,nlocal, /* newton_pair = */ 1,
-				   evdwl,0.0,forcelj,delx,dely,delz,tid);
+				   evdwl,0.0,forcelj,delx,dely,delz,thr);
 	}
 
 	// adjust rsq and delxyz for off-site O charge(s)
@@ -423,7 +419,7 @@ void PairLJCutCoulLongTIP4POMP::eval(double **f, int iifrom, int iito, int tid)
 	    if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
 	  } else ecoul = 0.0;
 
-	  if (EVFLAG) ev_tally_list_thr(this,n,vlist,ecoul,v,tid);
+	  if (EVFLAG) ev_tally_list_thr(this,n,vlist,ecoul,v,thr);
 	}
       }
     }
diff --git a/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.h b/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.h
index 093fc0216b39c0b888f85d1c8ff00e057d40fdcc..ff49bdcedb06fbf8e6b1c445ca96ee8da301d564 100644
--- a/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.h
+++ b/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.h
@@ -39,7 +39,6 @@ class PairLJCutCoulLongTIP4POMP : public PairLJCutCoulLongTIP4P, public ThrOMP {
   virtual double memory_usage();
 
  protected:
-
   // this is to cache m-shift corrected positions.
   int maxmpos;        // size of the following arrays
   int *h1idx, *h2idx; // local index of hydrogen atoms
@@ -48,7 +47,7 @@ class PairLJCutCoulLongTIP4POMP : public PairLJCutCoulLongTIP4P, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int VFLAG>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_cut_omp.cpp
index 3d82149fece935542ed9f1b539c1f258e29108ac..4932a784bb6857af603ce7113c45f6fef251f370 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_lj_cut_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
-  PairLJCut(lmp), ThrOMP(lmp, PAIR)
+  PairLJCut(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairLJCutOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,38 +43,34 @@ void PairLJCutOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJCutOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -84,10 +79,11 @@ void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -139,8 +135,8 @@ void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
 	  evdwl *= factor_lj;
 	}
 
-	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_lj_cut_omp.h b/src/USER-OMP/pair_lj_cut_omp.h
index 56f9f9b8a58e4005d68f0975c563d45128f27920..f97996e4807f2165407c8acd0ec93aec3ae8834d 100644
--- a/src/USER-OMP/pair_lj_cut_omp.h
+++ b/src/USER-OMP/pair_lj_cut_omp.h
@@ -39,7 +39,7 @@ class PairLJCutOMP : public PairLJCut, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_expand_omp.cpp b/src/USER-OMP/pair_lj_expand_omp.cpp
index 7b06503ee4a63c185fe181c38efc004b29ac0d8c..4f93d3bd4201d93c3c079ede7a983495cec1a2d2 100644
--- a/src/USER-OMP/pair_lj_expand_omp.cpp
+++ b/src/USER-OMP/pair_lj_expand_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJExpandOMP::PairLJExpandOMP(LAMMPS *lmp) :
-  PairLJExpand(lmp), ThrOMP(lmp, PAIR)
+  PairLJExpand(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairLJExpandOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,38 +43,34 @@ void PairLJExpandOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJExpandOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJExpandOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -85,10 +80,11 @@ void PairLJExpandOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -143,8 +139,8 @@ void PairLJExpandOMP::eval(double **f, int iifrom, int iito, int tid)
 	  evdwl *= factor_lj;
 	}
 
-	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_lj_expand_omp.h b/src/USER-OMP/pair_lj_expand_omp.h
index 29488deae88a5117e6afbd1aa3105493ccde9b8e..9ff8d3080a7ca516cbbfcb5b95e830b863f07551 100644
--- a/src/USER-OMP/pair_lj_expand_omp.h
+++ b/src/USER-OMP/pair_lj_expand_omp.h
@@ -39,7 +39,7 @@ class PairLJExpandOMP : public PairLJExpand, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp b/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp
index 2e97fa1b5ead080fed6806ea9c18af4408505368..ca8875c7f8c810256e8ac4f86b6cef65f2b6a0a1 100644
--- a/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp
+++ b/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJGromacsCoulGromacsOMP::PairLJGromacsCoulGromacsOMP(LAMMPS *lmp) :
-  PairLJGromacsCoulGromacs(lmp), ThrOMP(lmp, PAIR)
+  PairLJGromacsCoulGromacs(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairLJGromacsCoulGromacsOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,40 +43,36 @@ void PairLJGromacsCoulGromacsOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJGromacsCoulGromacsOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJGromacsCoulGromacsOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
@@ -87,13 +82,14 @@ void PairLJGromacsCoulGromacsOMP::eval(double **f, int iifrom, int iito, int tid
 
   evdwl = ecoul = 0.0;
 
-  double **x = atom->x;
-  double *q = atom->q;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_coul = force->special_coul;
-  double *special_lj = force->special_lj;
-  double qqrd2e = force->qqrd2e;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double * const special_lj = force->special_lj;
+  const double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -190,7 +186,7 @@ void PairLJGromacsCoulGromacsOMP::eval(double **f, int iifrom, int iito, int tid
 	}
 	
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,ecoul,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.h b/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.h
index d789bd6797f99809d62cdd5469f9e323bdc63328..ee506c2c4a6f2ce55eb26880be6184ed4ee00c4f 100644
--- a/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.h
+++ b/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.h
@@ -39,7 +39,7 @@ class PairLJGromacsCoulGromacsOMP : public PairLJGromacsCoulGromacs, public ThrO
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_gromacs_omp.cpp b/src/USER-OMP/pair_lj_gromacs_omp.cpp
index f1c7d2faf9450a83c9be561322796a92b31d8680..abdc4c5ccffeee0f40ee78f35bdb936a7da8c967 100644
--- a/src/USER-OMP/pair_lj_gromacs_omp.cpp
+++ b/src/USER-OMP/pair_lj_gromacs_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJGromacsOMP::PairLJGromacsOMP(LAMMPS *lmp) :
-  PairLJGromacs(lmp), ThrOMP(lmp, PAIR)
+  PairLJGromacs(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairLJGromacsOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,38 +43,34 @@ void PairLJGromacsOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJGromacsOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJGromacsOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -85,10 +80,11 @@ void PairLJGromacsOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -151,8 +147,8 @@ void PairLJGromacsOMP::eval(double **f, int iifrom, int iito, int tid)
 	  evdwl *= factor_lj;
 	}
 
-	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_lj_gromacs_omp.h b/src/USER-OMP/pair_lj_gromacs_omp.h
index d192a414ef56cf6b4f27d4c969cd8eb6c06cd702..8e0f4bd2810507a4f81935240be669782a19b061 100644
--- a/src/USER-OMP/pair_lj_gromacs_omp.h
+++ b/src/USER-OMP/pair_lj_gromacs_omp.h
@@ -39,7 +39,7 @@ class PairLJGromacsOMP : public PairLJGromacs, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_sf_omp.cpp b/src/USER-OMP/pair_lj_sf_omp.cpp
index 55ee908e474e5ac9312769af1d390509b4449361..47cc23bf91a28728e9843d2bbcb8cdf5893a1f39 100644
--- a/src/USER-OMP/pair_lj_sf_omp.cpp
+++ b/src/USER-OMP/pair_lj_sf_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJShiftedForceOMP::PairLJShiftedForceOMP(LAMMPS *lmp) :
-  PairLJShiftedForce(lmp), ThrOMP(lmp, PAIR)
+  PairLJShiftedForce(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairLJShiftedForceOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,38 +43,34 @@ void PairLJShiftedForceOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJShiftedForceOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJShiftedForceOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -84,10 +79,11 @@ void PairLJShiftedForceOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -142,8 +138,8 @@ void PairLJShiftedForceOMP::eval(double **f, int iifrom, int iito, int tid)
 	  evdwl *= factor_lj;
 	}
 
-	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_lj_sf_omp.h b/src/USER-OMP/pair_lj_sf_omp.h
index 6fba43fb8f8837551f7bb01d2050617fd20e1d73..c73c8f746b855c26d4d7af8a0c478f305948a120 100644
--- a/src/USER-OMP/pair_lj_sf_omp.h
+++ b/src/USER-OMP/pair_lj_sf_omp.h
@@ -39,7 +39,7 @@ class PairLJShiftedForceOMP : public PairLJShiftedForce, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_lj_smooth_omp.cpp b/src/USER-OMP/pair_lj_smooth_omp.cpp
index 1ad88044a622c9c7a5749aaa2de7edd57581cf25..4bf9ceb41c9900bdfdf91b0cc7952263b150c3bc 100644
--- a/src/USER-OMP/pair_lj_smooth_omp.cpp
+++ b/src/USER-OMP/pair_lj_smooth_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairLJSmoothOMP::PairLJSmoothOMP(LAMMPS *lmp) :
-  PairLJSmooth(lmp), ThrOMP(lmp, PAIR)
+  PairLJSmooth(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairLJSmoothOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,38 +43,34 @@ void PairLJSmoothOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJSmoothOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJSmoothOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -85,10 +80,11 @@ void PairLJSmoothOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -155,8 +151,8 @@ void PairLJSmoothOMP::eval(double **f, int iifrom, int iito, int tid)
 	  evdwl *= factor_lj;
 	}
 
-	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_lj_smooth_omp.h b/src/USER-OMP/pair_lj_smooth_omp.h
index de27a4008d7063a75e6e958a6805dc838ecaf13f..eb6eb92decaf544bf72338a1d9f556bb8c6eb453 100644
--- a/src/USER-OMP/pair_lj_smooth_omp.h
+++ b/src/USER-OMP/pair_lj_smooth_omp.h
@@ -39,7 +39,7 @@ class PairLJSmoothOMP : public PairLJSmooth, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_morse_omp.cpp b/src/USER-OMP/pair_morse_omp.cpp
index a53e35a9775e55168a8737c19c1ceb18b6cf9621..f61fd4e3835f4285ee8ca34dff6fdd7a7a23815d 100644
--- a/src/USER-OMP/pair_morse_omp.cpp
+++ b/src/USER-OMP/pair_morse_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairMorseOMP::PairMorseOMP(LAMMPS *lmp) :
-  PairMorse(lmp), ThrOMP(lmp, PAIR)
+  PairMorse(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairMorseOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,38 +43,34 @@ void PairMorseOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairMorseOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairMorseOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -84,10 +79,11 @@ void PairMorseOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -139,8 +135,8 @@ void PairMorseOMP::eval(double **f, int iifrom, int iito, int tid)
 	  evdwl *= factor_lj;
 	}
 
-	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_morse_omp.h b/src/USER-OMP/pair_morse_omp.h
index a966e6f11fd8f60463e345ee88895d1070c5be76..a20aad6716cfaddce3a16a68253e83fa1daff6be 100644
--- a/src/USER-OMP/pair_morse_omp.h
+++ b/src/USER-OMP/pair_morse_omp.h
@@ -39,7 +39,7 @@ class PairMorseOMP : public PairMorse, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_peri_lps_omp.cpp b/src/USER-OMP/pair_peri_lps_omp.cpp
index 7cb1e83086b3ea74f6029b58887db6b96e73b077..e052271e4f9af918845b365088193d89b988e521 100644
--- a/src/USER-OMP/pair_peri_lps_omp.cpp
+++ b/src/USER-OMP/pair_peri_lps_omp.cpp
@@ -26,15 +26,18 @@
 #include "modify.h"
 #include "neighbor.h"
 #include "neigh_list.h"
+#include "math_const.h"
 
 using namespace LAMMPS_NS;
+using namespace MathConst;
 
 /* ---------------------------------------------------------------------- */
 
 PairPeriLPSOMP::PairPeriLPSOMP(LAMMPS *lmp) :
-  PairPeriLPS(lmp), ThrOMP(lmp, PAIR)
+  PairPeriLPS(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
+  fix_name = "PERI_NEIGH_OMP";
 }
 
 /* ---------------------------------------------------------------------- */
@@ -43,7 +46,6 @@ void PairPeriLPSOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -61,38 +63,34 @@ void PairPeriLPSOMP::compute(int eflag, int vflag)
   }
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairPeriLPSOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz;
@@ -103,9 +101,10 @@ void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
   double fxtmp,fytmp,fztmp;
 
   double *vfrac = atom->vfrac;
@@ -151,7 +150,7 @@ void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid)
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
- 
+
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
@@ -182,7 +181,7 @@ void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid)
 	// of the bond-based theory used in PMB model
 
         double kshort = (15.0 * 18.0 * bulkmodulus[itype][itype]) /
-	  (3.141592653589793 * cutsq[itype][jtype] * cutsq[itype][jtype]);
+	  (MY_PI * cutsq[itype][jtype] * cutsq[itype][jtype]);
         rk = (kshort * vfrac[j]) * (dr / cut[itype][jtype]);
 
         if (r > 0.0) fpair = -(rk/r);
@@ -199,7 +198,7 @@ void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid)
 
         if (EFLAG) evdwl = 0.5*rk*dr;
 	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,evdwl,0.0,
-				 fpair*vfrac[i],delx,dely,delz,tid);
+				 fpair*vfrac[i],delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
@@ -214,7 +213,7 @@ void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid)
 #if defined(_OPENMP)
   // each thread works on a fixed chunk of atoms.
   const int idelta = 1 + nlocal/comm->nthreads;
-  iifrom = tid*idelta;
+  iifrom = thr->get_tid()*idelta;
   iito   = iifrom + idelta;
   if (iito > nlocal)
     iito = nlocal;
@@ -234,7 +233,7 @@ void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid)
 #endif
   { // communicate dilatation (theta) of each particle	
     comm->forward_comm_pair(this);
-    // communicate wighted volume (wvolume) upon every reneighbor
+    // communicate weighted volume (wvolume) upon every reneighbor
     if (neighbor->ago == 0)
       comm->forward_comm_fix(modify->fix[ifix_peri]);
   }
@@ -245,10 +244,8 @@ void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid)
   if (EFLAG) {
     for (i = iifrom; i < iito; i++) {   
       itype = type[i];
-      if (eflag_global)
-	eng_vdwl_thr[tid] += 0.5 * bulkmodulus[itype][itype] * (theta[i] * theta[i]);
-      if (eflag_atom)
-	eatom_thr[tid][i] += 0.5 * bulkmodulus[itype][itype] * (theta[i] * theta[i]);
+      e_tally_thr(this, i, i, nlocal, NEWTON_PAIR,
+		  0.5 * bulkmodulus[itype][itype] * (theta[i] * theta[i]), 0.0, thr);
     }
   }
 
@@ -332,7 +329,7 @@ void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid)
 		   omega_plus*(deviatoric_extension * deviatoric_extension) *
 		   vfrac[j] * vfrac_scale;
       if (EVFLAG) ev_tally_thr(this,i,i,nlocal,0,0.5*evdwl,0.0,
-			       0.5*fbond*vfrac[i],delx,dely,delz,tid);
+			       0.5*fbond*vfrac[i],delx,dely,delz,thr);
 
       // find stretch in bond I-J and break if necessary
       // use s0 from previous timestep
diff --git a/src/USER-OMP/pair_peri_lps_omp.h b/src/USER-OMP/pair_peri_lps_omp.h
index 2068830ca09666932fbd1307460aab0a90c05701..f234a4109814c9fde416069ebb369622e47be35c 100644
--- a/src/USER-OMP/pair_peri_lps_omp.h
+++ b/src/USER-OMP/pair_peri_lps_omp.h
@@ -43,7 +43,7 @@ class PairPeriLPSOMP : public PairPeriLPS, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_peri_pmb_omp.cpp b/src/USER-OMP/pair_peri_pmb_omp.cpp
index 4e46d142d9be6375ad7b97c4c1ca009408be11de..96e991bab6eb22ef59f769e7da62b501c63bee90 100644
--- a/src/USER-OMP/pair_peri_pmb_omp.cpp
+++ b/src/USER-OMP/pair_peri_pmb_omp.cpp
@@ -32,9 +32,10 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairPeriPMBOMP::PairPeriPMBOMP(LAMMPS *lmp) :
-  PairPeriPMB(lmp), ThrOMP(lmp, PAIR)
+ PairPeriPMB(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
+  fix_name = "PERI_NEIGH_OMP";
 }
 
 /* ---------------------------------------------------------------------- */
@@ -43,7 +44,6 @@ void PairPeriPMBOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -59,38 +59,34 @@ void PairPeriPMBOMP::compute(int eflag, int vflag)
   }
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairPeriPMBOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairPeriPMBOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz;
@@ -101,9 +97,10 @@ void PairPeriPMBOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
   double fxtmp,fytmp,fztmp;
 
   double *vfrac = atom->vfrac;
@@ -148,10 +145,11 @@ void PairPeriPMBOMP::eval(double **f, int iifrom, int iito, int tid)
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
- 
+
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
+
       rsq = delx*delx + dely*dely + delz*delz;
       delx0 = xtmp0 - x0[j][0];
       dely0 = ytmp0 - x0[j][1];
@@ -190,7 +188,7 @@ void PairPeriPMBOMP::eval(double **f, int iifrom, int iito, int tid)
 
         if (EFLAG) evdwl = 0.5*rk*dr;
 	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,evdwl,0.0,
-				 fpair*vfrac[i],delx,dely,delz,tid);
+				 fpair*vfrac[i],delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
@@ -205,7 +203,7 @@ void PairPeriPMBOMP::eval(double **f, int iifrom, int iito, int tid)
 #if defined(_OPENMP)
   // each thread works on a fixed chunk of atoms.
   const int idelta = 1 + nlocal/comm->nthreads;
-  iifrom = tid*idelta;
+  iifrom = thr->get_tid()*idelta;
   iito   = iifrom + idelta;
   if (iito > nlocal)
     iito = nlocal;
@@ -278,7 +276,7 @@ void PairPeriPMBOMP::eval(double **f, int iifrom, int iito, int tid)
       if (EFLAG) evdwl = 0.5*rk*dr;
       if (EVFLAG) 
 	ev_tally_thr(this,i,i,nlocal,0,0.5*evdwl,0.0,
-		     0.5*fbond*vfrac[i],delx,dely,delz,tid);
+		     0.5*fbond*vfrac[i],delx,dely,delz,thr);
 
       // find stretch in bond I-J and break if necessary
       // use s0 from previous timestep
@@ -291,13 +289,14 @@ void PairPeriPMBOMP::eval(double **f, int iifrom, int iito, int tid)
          s0_new[i] = s00[itype][jtype] - (alpha[itype][jtype] * stretch);
       else
          s0_new[i] = MAX(s0_new[i],s00[itype][jtype] - (alpha[itype][jtype] * stretch));
+
       first = false;
     }
   }
 
   sync_threads();
 
-  // store new s0
+  // store new s0 (in parallel)
   for (i = iifrom; i < iito; i++) s0[i] = s0_new[i]; 
 }
 
diff --git a/src/USER-OMP/pair_peri_pmb_omp.h b/src/USER-OMP/pair_peri_pmb_omp.h
index 9940e5ed15dafe63befa77636e6ddeab0be4a03f..8a7fc091d9aa7df290ee1246f0928ae9cf0803de 100644
--- a/src/USER-OMP/pair_peri_pmb_omp.h
+++ b/src/USER-OMP/pair_peri_pmb_omp.h
@@ -39,7 +39,7 @@ class PairPeriPMBOMP : public PairPeriPMB, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_resquared_omp.cpp b/src/USER-OMP/pair_resquared_omp.cpp
index 4870553050b4b7f4186932b8ddd7a881f5ba69d6..cef5aaefc554a2f16d15109d1a6c96e531fd8589 100644
--- a/src/USER-OMP/pair_resquared_omp.cpp
+++ b/src/USER-OMP/pair_resquared_omp.cpp
@@ -27,7 +27,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairRESquaredOMP::PairRESquaredOMP(LAMMPS *lmp) :
-  PairRESquared(lmp), ThrOMP(lmp, PAIR)
+  PairRESquared(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -38,7 +38,6 @@ void PairRESquaredOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -46,40 +45,34 @@ void PairRESquaredOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f, **torque;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
-    torque = atom->torque + tid*nall;
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, torque, ifrom, ito, tid);
-	else eval<1,1,0>(f, torque, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, torque, ifrom, ito, tid);
-	else eval<1,0,0>(f, torque, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, torque, ifrom, ito, tid);
-      else eval<0,0,0>(f, torque, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces and torques into global arrays.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
-    data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairRESquaredOMP::eval(double **f, double **tor, int iifrom, int iito, int tid)
+void PairRESquaredOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double evdwl,one_eng,rsq,r2inv,r6inv,forcelj,factor_lj;
@@ -87,11 +80,12 @@ void PairRESquaredOMP::eval(double **f, double **tor, int iifrom, int iito, int
   int *ilist,*jlist,*numneigh,**firstneigh;
   RE2Vars wi,wj;
 
-  double **x = atom->x;
-  int *ellipsoid = atom->ellipsoid;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  double * const * const tor = thr->get_torque();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
 
   double fxtmp,fytmp,fztmp,t1tmp,t2tmp,t3tmp;
 
@@ -105,6 +99,7 @@ void PairRESquaredOMP::eval(double **f, double **tor, int iifrom, int iito, int
 
     i = ilist[ii];
     itype = type[i];
+    fxtmp=fytmp=fztmp=t1tmp=t2tmp=t3tmp=0.0;
 
     // not a LJ sphere
 
@@ -129,6 +124,8 @@ void PairRESquaredOMP::eval(double **f, double **tor, int iifrom, int iito, int
       // compute if less than cutoff
 
       if (rsq < cutsq[itype][jtype]) {
+	fforce[0] = fforce[1] = fforce[2] = 0.0;
+
         switch (form[itype][jtype]) {
 
          case SPHERE_SPHERE:
@@ -157,17 +154,17 @@ void PairRESquaredOMP::eval(double **f, double **tor, int iifrom, int iito, int
 
          case ELLIPSE_SPHERE:
           one_eng = resquared_lj(i,j,wi,r12,rsq,fforce,ttor,true);
-          tor[i][0] += ttor[0]*factor_lj;
-          tor[i][1] += ttor[1]*factor_lj;
-          tor[i][2] += ttor[2]*factor_lj;
+          t1tmp += ttor[0]*factor_lj;
+          t2tmp += ttor[1]*factor_lj;
+          t3tmp += ttor[2]*factor_lj;
           break;
 
          default:
           precompute_i(j,wj);
           one_eng = resquared_analytic(i,j,wi,wj,r12,rsq,fforce,ttor,rtor);
-          tor[i][0] += ttor[0]*factor_lj;
-          tor[i][1] += ttor[1]*factor_lj;
-          tor[i][2] += ttor[2]*factor_lj;
+          t1tmp += ttor[0]*factor_lj;
+          t2tmp += ttor[1]*factor_lj;
+          t3tmp += ttor[2]*factor_lj;
           if (NEWTON_PAIR || j < nlocal) {
             tor[j][0] += rtor[0]*factor_lj;
             tor[j][1] += rtor[1]*factor_lj;
@@ -179,9 +176,9 @@ void PairRESquaredOMP::eval(double **f, double **tor, int iifrom, int iito, int
         fforce[0] *= factor_lj;
         fforce[1] *= factor_lj;
         fforce[2] *= factor_lj;
-        f[i][0] += fforce[0];
-        f[i][1] += fforce[1];
-        f[i][2] += fforce[2];
+	fxtmp += fforce[0];
+	fytmp += fforce[1];
+	fztmp += fforce[2];
 
         if (NEWTON_PAIR || j < nlocal) {
           f[j][0] -= fforce[0];
@@ -193,9 +190,15 @@ void PairRESquaredOMP::eval(double **f, double **tor, int iifrom, int iito, int
 
 	if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR,
 				     evdwl,0.0,fforce[0],fforce[1],fforce[2],
-				     -r12[0],-r12[1],-r12[2],tid);
+				     -r12[0],-r12[1],-r12[2],thr);
       }
     }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+    tor[i][0] += t1tmp;
+    tor[i][1] += t2tmp;
+    tor[i][2] += t3tmp;
   }
 }
 
diff --git a/src/USER-OMP/pair_resquared_omp.h b/src/USER-OMP/pair_resquared_omp.h
index 2a50bb6dd0bc0a8c4e4305304a7ed72f5eb722cb..53a6e2e28f059e7a76449c325bdce7601a5a574c 100644
--- a/src/USER-OMP/pair_resquared_omp.h
+++ b/src/USER-OMP/pair_resquared_omp.h
@@ -39,7 +39,7 @@ class PairRESquaredOMP : public PairRESquared, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, double **torque, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_soft_omp.cpp b/src/USER-OMP/pair_soft_omp.cpp
index 9f9673a28b5f7a38e90e3ce7127d5c5bd020a62f..cbc1c9f7fd6daf7d4571cda4d7d5e613754b6727 100644
--- a/src/USER-OMP/pair_soft_omp.cpp
+++ b/src/USER-OMP/pair_soft_omp.cpp
@@ -29,7 +29,7 @@ using namespace MathConst;
 /* ---------------------------------------------------------------------- */
 
 PairSoftOMP::PairSoftOMP(LAMMPS *lmp) :
-  PairSoft(lmp), ThrOMP(lmp, PAIR)
+  PairSoft(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -40,7 +40,6 @@ void PairSoftOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -48,38 +47,34 @@ void PairSoftOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairSoftOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairSoftOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -88,10 +83,11 @@ void PairSoftOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -141,8 +137,8 @@ void PairSoftOMP::eval(double **f, int iifrom, int iito, int tid)
 	if (EFLAG)
 	  evdwl = factor_lj * prefactor[itype][jtype] * (1.0+cos(arg*r));
 
-	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_soft_omp.h b/src/USER-OMP/pair_soft_omp.h
index 840d874601a88296d53b087156a1861cb6b334bc..1698089521794aee7dccc30e32d7ee85b77d38d4 100644
--- a/src/USER-OMP/pair_soft_omp.h
+++ b/src/USER-OMP/pair_soft_omp.h
@@ -39,7 +39,7 @@ class PairSoftOMP : public PairSoft, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_sw_omp.cpp b/src/USER-OMP/pair_sw_omp.cpp
index 5d7f1a60d75fae86908ff12ea2ce2b8c2a0d6070..12aceed1d45d1c28b197e4688adb6edf9d68c9eb 100644
--- a/src/USER-OMP/pair_sw_omp.cpp
+++ b/src/USER-OMP/pair_sw_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairSWOMP::PairSWOMP(LAMMPS *lmp) :
-  PairSW(lmp), ThrOMP(lmp, PAIR)
+  PairSW(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairSWOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,33 +43,29 @@ void PairSWOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	eval<1,1>(f, ifrom, ito, tid);
+	eval<1,1>(ifrom, ito, thr);
       } else {
-	eval<1,0>(f, ifrom, ito, tid);
+	eval<1,0>(ifrom, ito, thr);
       }
-    } else eval<0,0>(f, ifrom, ito, tid);
+    } else eval<0,0>(ifrom, ito, thr);
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG>
-void PairSWOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairSWOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,k,ii,jj,kk,jnum,jnumm1,itag,jtag;
   int itype,jtype,ktype,ijparam,ikparam,ijkparam;
@@ -81,10 +76,11 @@ void PairSWOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *tag = atom->tag;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const tag = atom->tag;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
@@ -92,7 +88,7 @@ void PairSWOMP::eval(double **f, int iifrom, int iito, int tid)
 
   double fxtmp,fytmp,fztmp;
 
-  // loop over neighbors of my atoms
+  // loop over full neighbor list of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
@@ -144,7 +140,7 @@ void PairSWOMP::eval(double **f, int iifrom, int iito, int tid)
       f[j][2] -= delz*fpair;
 
       if (EVFLAG) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1,
-			       evdwl,0.0,fpair,delx,dely,delz,tid);
+			       evdwl,0.0,fpair,delx,dely,delz,thr);
     }
 
     jnumm1 = jnum - 1;
@@ -189,7 +185,7 @@ void PairSWOMP::eval(double **f, int iifrom, int iito, int tid)
 	f[k][1] += fk[1];
 	f[k][2] += fk[2];
 
-	if (EVFLAG) ev_tally3_thr(this,i,j,k,evdwl,0.0,fj,fk,delr1,delr2,tid);
+	if (EVFLAG) ev_tally3_thr(this,i,j,k,evdwl,0.0,fj,fk,delr1,delr2,thr);
       }
       f[j][0] += fjxtmp;
       f[j][1] += fjytmp;
diff --git a/src/USER-OMP/pair_sw_omp.h b/src/USER-OMP/pair_sw_omp.h
index 40052d7d4197401677ead18459089f022d39c364..c4af86007a3bd76648bc698d22be535fa4dc8d28 100644
--- a/src/USER-OMP/pair_sw_omp.h
+++ b/src/USER-OMP/pair_sw_omp.h
@@ -39,7 +39,7 @@ class PairSWOMP : public PairSW, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_table_omp.cpp b/src/USER-OMP/pair_table_omp.cpp
index 6b14d4c9813e61dae6452489017f0b1be62440cd..e8d63e590d35f98cde3171cb5246f4f13a07216e 100644
--- a/src/USER-OMP/pair_table_omp.cpp
+++ b/src/USER-OMP/pair_table_omp.cpp
@@ -26,7 +26,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairTableOMP::PairTableOMP(LAMMPS *lmp) :
-  PairTable(lmp), ThrOMP(lmp, PAIR)
+  PairTable(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -37,7 +37,6 @@ void PairTableOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -45,38 +44,34 @@ void PairTableOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairTableOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairTableOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype,itable;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -89,10 +84,11 @@ void PairTableOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -122,7 +118,7 @@ void PairTableOMP::eval(double **f, int iifrom, int iito, int tid)
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
-      
+
       if (rsq < cutsq[itype][jtype]) {
 	tb = &tables[tabindex[itype][jtype]];
 	if (rsq < tb->innersq)
@@ -181,7 +177,7 @@ void PairTableOMP::eval(double **f, int iifrom, int iito, int tid)
 	}
 
 	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
 
diff --git a/src/USER-OMP/pair_table_omp.h b/src/USER-OMP/pair_table_omp.h
index 6fd1ce74a4effea9a7b9ce4dc54278a39c96b5a2..974149b9ac35cc92776126a6d4a9a2be9d1274d3 100644
--- a/src/USER-OMP/pair_table_omp.h
+++ b/src/USER-OMP/pair_table_omp.h
@@ -39,7 +39,7 @@ class PairTableOMP : public PairTable, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_tersoff_omp.cpp b/src/USER-OMP/pair_tersoff_omp.cpp
index f59a8488f7a36117c876093be7dacce54ab0a7e4..fdbcd48292d208b9b2585189741052fe3c45dce6 100644
--- a/src/USER-OMP/pair_tersoff_omp.cpp
+++ b/src/USER-OMP/pair_tersoff_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairTersoffOMP::PairTersoffOMP(LAMMPS *lmp) :
-  PairTersoff(lmp), ThrOMP(lmp, PAIR)
+  PairTersoff(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairTersoffOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = vflag_atom = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,35 +43,31 @@ void PairTersoffOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (vflag_atom) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (vflag_atom) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (vflag_atom) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (vflag_atom) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
-    } else eval<0,0,0>(f, ifrom, ito, tid);
+    } else eval<0,0,0>(ifrom, ito, thr);
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int VFLAG_ATOM>
-void PairTersoffOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairTersoffOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,k,ii,jj,kk,jnum;
   int itag,jtag,itype,jtype,ktype,iparam_ij,iparam_ijk;
@@ -84,10 +79,11 @@ void PairTersoffOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *tag = atom->tag;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const tag = atom->tag;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
@@ -147,7 +143,7 @@ void PairTersoffOMP::eval(double **f, int iifrom, int iito, int tid)
       f[j][2] -= delz*fpair;
 
       if (EVFLAG) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1,
-			       evdwl,0.0,fpair,delx,dely,delz,tid);
+			       evdwl,0.0,fpair,delx,dely,delz,thr);
     }
 
     // three-body interactions
@@ -199,7 +195,7 @@ void PairTersoffOMP::eval(double **f, int iifrom, int iito, int tid)
       fjztmp -= delr1[2]*fpair;
 
       if (EVFLAG) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1,evdwl,0.0,
-			       -fpair,-delr1[0],-delr1[1],-delr1[2],tid);
+			       -fpair,-delr1[0],-delr1[1],-delr1[2],thr);
 
       // attractive term via loop over k
 
@@ -229,7 +225,7 @@ void PairTersoffOMP::eval(double **f, int iifrom, int iito, int tid)
 	f[k][1] += fk[1];
 	f[k][2] += fk[2];
 
-	if (VFLAG_ATOM) v_tally3_thr(i,j,k,fj,fk,delr1,delr2,tid);
+	if (VFLAG_ATOM) v_tally3_thr(i,j,k,fj,fk,delr1,delr2,thr);
       }
       f[j][0] += fjxtmp;
       f[j][1] += fjytmp;
diff --git a/src/USER-OMP/pair_tersoff_omp.h b/src/USER-OMP/pair_tersoff_omp.h
index 5e5dc066d2f50c15a3bfc522f325a0010a38dbe2..97c20548aff2356f00d111487a26a90d5f8b48eb 100644
--- a/src/USER-OMP/pair_tersoff_omp.h
+++ b/src/USER-OMP/pair_tersoff_omp.h
@@ -34,7 +34,7 @@ class PairTersoffOMP : public PairTersoff, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int VFLAG_ATOM>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_yukawa_colloid_omp.cpp b/src/USER-OMP/pair_yukawa_colloid_omp.cpp
index 710ad9df187be033641661d2005e0cce50d2725a..6caa13ee90ec4880a4cde0c1aedbc204413a4bdf 100644
--- a/src/USER-OMP/pair_yukawa_colloid_omp.cpp
+++ b/src/USER-OMP/pair_yukawa_colloid_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairYukawaColloidOMP::PairYukawaColloidOMP(LAMMPS *lmp) :
-  PairYukawaColloid(lmp), ThrOMP(lmp, PAIR)
+  PairYukawaColloid(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairYukawaColloidOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,38 +43,34 @@ void PairYukawaColloidOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairYukawaColloidOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairYukawaColloidOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair,radi,radj;
@@ -84,11 +79,12 @@ void PairYukawaColloidOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  double *radius = atom->radius;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const radius = atom->radius;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -144,7 +140,7 @@ void PairYukawaColloidOMP::eval(double **f, int iifrom, int iito, int tid)
 	  evdwl *= factor;
 	}
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_yukawa_colloid_omp.h b/src/USER-OMP/pair_yukawa_colloid_omp.h
index 9483cd15c1becdbe5a25c96ba13ad50bd0cb4bfb..c424e9eff354c7974c8f32230e6000525c50eb3e 100644
--- a/src/USER-OMP/pair_yukawa_colloid_omp.h
+++ b/src/USER-OMP/pair_yukawa_colloid_omp.h
@@ -39,7 +39,7 @@ class PairYukawaColloidOMP : public PairYukawaColloid, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/pair_yukawa_omp.cpp b/src/USER-OMP/pair_yukawa_omp.cpp
index 1380e2239c1357a5484eaf2b3302f1ac3e99f246..210c7fcc1eaf2338db67aa040c0ceec5a644823b 100644
--- a/src/USER-OMP/pair_yukawa_omp.cpp
+++ b/src/USER-OMP/pair_yukawa_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 PairYukawaOMP::PairYukawaOMP(LAMMPS *lmp) :
-  PairYukawa(lmp), ThrOMP(lmp, PAIR)
+  PairYukawa(lmp), ThrOMP(lmp, THR_PAIR)
 {
   respa_enable = 0;
 }
@@ -36,7 +36,6 @@ void PairYukawaOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
-    ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
@@ -44,38 +43,34 @@ void PairYukawaOMP::compute(int eflag, int vflag)
   const int inum = list->inum;
 
 #if defined(_OPENMP)
-#pragma omp parallel default(shared)
+#pragma omp parallel default(none) shared(eflag,vflag)
 #endif
   {
     int ifrom, ito, tid;
-    double **f;
 
-    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
 
     if (evflag) {
       if (eflag) {
-	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
-	else eval<1,1,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
       } else {
-	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
-	else eval<1,0,0>(f, ifrom, ito, tid);
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
       }
     } else {
-      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
-      else eval<0,0,0>(f, ifrom, ito, tid);
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
     }
 
-    // reduce per thread forces into global force array.
-    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    reduce_thr(this, eflag, vflag, thr);
   } // end of omp parallel region
-
-  // reduce per thread energy and virial, if requested.
-  if (evflag) ev_reduce_thr(this);
-  if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairYukawaOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairYukawaOMP::eval(int iifrom, int iito, ThrData * const thr)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
@@ -84,10 +79,11 @@ void PairYukawaOMP::eval(double **f, int iifrom, int iito, int tid)
 
   evdwl = 0.0;
 
-  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
@@ -141,8 +137,8 @@ void PairYukawaOMP::eval(double **f, int iifrom, int iito, int tid)
 	  evdwl *= factor;
 	}
 
-	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
       }
     }
     f[i][0] += fxtmp;
diff --git a/src/USER-OMP/pair_yukawa_omp.h b/src/USER-OMP/pair_yukawa_omp.h
index e363ac6d1742d53d4747d364664be3b1f309598f..99abc569fa207274d9d8ea8ce2261d8f2d8b98b4 100644
--- a/src/USER-OMP/pair_yukawa_omp.h
+++ b/src/USER-OMP/pair_yukawa_omp.h
@@ -39,7 +39,7 @@ class PairYukawaOMP : public PairYukawa, public ThrOMP {
 
  private:
   template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-  void eval(double **f, int ifrom, int ito, int tid);
+  void eval(int ifrom, int ito, ThrData * const thr);
 };
 
 }
diff --git a/src/USER-OMP/thr_omp.cpp b/src/USER-OMP/thr_omp.cpp
index 37ce1f198b011f6392574ee9460dc90f7e75e7d4..19537868e5d491f52f445c00a44ba4ad8f3ae30a 100644
--- a/src/USER-OMP/thr_omp.cpp
+++ b/src/USER-OMP/thr_omp.cpp
@@ -16,213 +16,481 @@
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
-#include "thr_omp.h"
-
-#include "memory.h"
-
 #include "atom.h"
 #include "comm.h"
+#include "error.h"
 #include "force.h"
+#include "memory.h"
+#include "modify.h"
+#include "neighbor.h"
+
+#include "thr_omp.h"
 
 #include "pair.h"
+#include "bond.h"
+#include "angle.h"
 #include "dihedral.h"
-
-#if defined(_OPENMP)
-#include <omp.h>
-#endif
+#include "improper.h"
+#include "kspace.h"
 
 #include "math_const.h"
 
+#include <string.h>
+
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 /* ---------------------------------------------------------------------- */
 
-ThrOMP::ThrOMP(LAMMPS *ptr, int style) : thr_style(style), lmp(ptr)
+ThrOMP::ThrOMP(LAMMPS *ptr, int style) : lmp(ptr), fix(NULL), thr_style(style)
 {
-  // initialize fixed size per thread storage
-  eng_vdwl_thr = eng_coul_thr = eng_bond_thr = NULL;
-  virial_thr = NULL;
-
-  lmp->memory->create(eng_vdwl_thr,lmp->comm->nthreads,"thr_omp:eng_vdwl_thr");
-  lmp->memory->create(eng_coul_thr,lmp->comm->nthreads,"thr_omp:eng_coul_thr");
-  lmp->memory->create(eng_bond_thr,lmp->comm->nthreads,"thr_omp:eng_bond_thr");
-  lmp->memory->create(virial_thr,lmp->comm->nthreads,6,"thr_omp:virial_thr");
-
-  // variable size per thread, per atom storage
-  // the actually allocation happens via memory->grow() in ev_steup_thr()
-  maxeatom_thr = maxvatom_thr = 0;
-  evflag_global = evflag_atom = 0;
-  eatom_thr = NULL;
-  vatom_thr = NULL;
+  // register fix omp with this class
+  int ifix = lmp->modify->find_fix("package_omp");
+  if (ifix < 0)
+    lmp->error->all(FLERR,"The 'package omp' command is required for /omp styles");
+  fix = static_cast<FixOMP *>(lmp->modify->fix[ifix]);
 }
 
 /* ---------------------------------------------------------------------- */
 
 ThrOMP::~ThrOMP()
 {
-  lmp->memory->destroy(eng_vdwl_thr);
-  lmp->memory->destroy(eng_coul_thr);
-  lmp->memory->destroy(eng_bond_thr);
-  lmp->memory->destroy(virial_thr);
-  lmp->memory->destroy(eatom_thr);
-  lmp->memory->destroy(vatom_thr);
+  // nothing to do?
 }
 
-/* ---------------------------------------------------------------------- */
+/* ----------------------------------------------------------------------
+   Hook up per thread per atom arrays into the tally infrastructure
+   ---------------------------------------------------------------------- */
 
-void ThrOMP::ev_setup_acc_thr(int ntotal, int eflag_global, int vflag_global,
-			     int eflag_atom, int vflag_atom, int nthreads)
+void ThrOMP::ev_setup_thr(int eflag, int vflag, int nall, double *eatom,
+			  double **vatom, ThrData *thr)
 {
-  int t,i;
-
-  evflag_global = (eflag_global || vflag_global);
-  evflag_atom = (eflag_atom || vflag_atom);
+  const int tid = thr->get_tid();
   
-  for (t = 0; t < nthreads; ++t) {
+  if (thr_style & THR_PAIR) {
+    if (eflag & 2) {
+      thr->eatom_pair = eatom + tid*nall;
+      memset(&(thr->eatom_pair[0]),0,nall*sizeof(double));
+    }
+    if (vflag & 4) {
+      thr->vatom_pair = vatom + tid*nall;
+      memset(&(thr->vatom_pair[0][0]),0,nall*6*sizeof(double));
+    }
+  }
+
+  if (thr_style & THR_BOND) {
+    if (eflag & 2) {
+      thr->eatom_bond = eatom + tid*nall;
+      memset(&(thr->eatom_bond[0]),0,nall*sizeof(double));
+    }
+    if (vflag & 4) {
+      thr->vatom_bond = vatom + tid*nall;
+      memset(&(thr->vatom_bond[0][0]),0,nall*6*sizeof(double));
+    }
+  }
 
-    if (eflag_global) 
-      eng_vdwl_thr[t] = eng_coul_thr[t] = eng_bond_thr[t] = 0.0;
+  if (thr_style & THR_ANGLE) {
+    if (eflag & 2) {
+      thr->eatom_angle = eatom + tid*nall;
+      memset(&(thr->eatom_angle[0]),0,nall*sizeof(double));
+    }
+    if (vflag & 4) {
+      thr->vatom_angle = vatom + tid*nall;
+      memset(&(thr->vatom_angle[0][0]),0,nall*6*sizeof(double));
+    }
+  }
 
-    if (vflag_global) 
-      for (i = 0; i < 6; ++i)
-	virial_thr[t][i] = 0.0;
+  if (thr_style & THR_DIHEDRAL) {
+    if (eflag & 2) {
+      thr->eatom_dihed = eatom + tid*nall;
+      memset(&(thr->eatom_dihed[0]),0,nall*sizeof(double));
+    }
+    if (vflag & 4) {
+      thr->vatom_dihed = vatom + tid*nall;
+      memset(&(thr->vatom_dihed[0][0]),0,nall*6*sizeof(double));
+    }
+  }
 
-    if (eflag_atom)
-      for (i = 0; i < ntotal; ++i)
-	eatom_thr[t][i] = 0.0;
-    
-    if (vflag_atom)
-      for (i = 0; i < ntotal; ++i) {
-        vatom_thr[t][i][0] = 0.0;
-        vatom_thr[t][i][1] = 0.0;
-        vatom_thr[t][i][2] = 0.0;
-        vatom_thr[t][i][3] = 0.0;
-        vatom_thr[t][i][4] = 0.0;
-        vatom_thr[t][i][5] = 0.0;
-      }
+  if (thr_style & THR_IMPROPER) {
+    if (eflag & 2) {
+      thr->eatom_imprp = eatom + tid*nall;
+      memset(&(thr->eatom_imprp[0]),0,nall*sizeof(double));
+    }
+    if (vflag & 4) {
+      thr->vatom_imprp = vatom + tid*nall;
+      memset(&(thr->vatom_imprp[0][0]),0,nall*6*sizeof(double));
+    }
   }
-}
 
-/* ---------------------------------------------------------------------- */
+#if 0 /* not supported (yet) */
+  if (thr_style & THR_KSPACE) {
+    if (eflag & 2) {
+      thr->eatom_kspce = eatom + tid*nall;
+      memset(&(thr->eatom_kspce[0]),0,nall*sizeof(double));
+    }
+    if (vflag & 4) {
+      thr->vatom_kspce = vatom + tid*nall;
+      memset(&(thr->vatom_kspce[0][0]),0,nall*6*sizeof(double));
+    }
+  }
+#endif
+}
 
-void ThrOMP::ev_setup_thr(Dihedral *dihed)
+/* ----------------------------------------------------------------------
+   Reduce per thread data into the regular structures
+   Reduction of global properties is serialized with a "critical"
+   directive, so that only one thread at a time will access the
+   global variables. Since we are not synchronized, this should
+   come with little overhead. The reduction of per-atom properties
+   in contrast is parallelized over threads in the same way as forces.
+   ---------------------------------------------------------------------- */
+
+void ThrOMP::reduce_thr(void *style, const int eflag, const int vflag,
+			ThrData *const thr, const int nproxy)
 {
-  int nthreads = lmp->comm->nthreads;
+  const int nlocal = lmp->atom->nlocal;
+  const int nghost = lmp->atom->nghost;
+  const int nall = nlocal + nghost;
+  const int nfirst = lmp->atom->nfirst;
+  const int nthreads = lmp->comm->nthreads;
+  const int evflag = eflag | vflag;
+  
+  const int tid = thr->get_tid();
+  double **f = lmp->atom->f;
+  double **x = lmp->atom->x;
+
+  switch (thr_style) {
 
-  // reallocate per-atom arrays if necessary
-  if (dihed->eflag_atom && lmp->atom->nmax > maxeatom_thr) {
-    maxeatom_thr = lmp->atom->nmax;
-    lmp->memory->grow(eatom_thr,nthreads,maxeatom_thr,"thr_omp:eatom_thr");
+  case THR_PAIR: {
+    Pair * const pair = lmp->force->pair;
+  
+    if (pair->vflag_fdotr) {
+      if (lmp->neighbor->includegroup == 0)
+	thr->virial_fdotr_compute(x, nlocal, nghost, -1);
+      else
+	thr->virial_fdotr_compute(x, nlocal, nghost, nfirst);
+    }
+
+    if (evflag) {
+#if defined(_OPENMP)
+#pragma omp critical
+#endif
+      {
+	if (eflag & 1) {
+	  pair->eng_vdwl += thr->eng_vdwl;
+	  pair->eng_coul += thr->eng_coul;
+	  thr->eng_vdwl = 0.0;
+	  thr->eng_coul = 0.0;
+	}
+	if (vflag & 3)
+	  for (int i=0; i < 6; ++i) {
+	    pair->virial[i] += thr->virial_pair[i];
+	    thr->virial_pair[i] = 0.0;
+	  }
+      }
+      if (eflag & 2) {
+	sync_threads();
+	data_reduce_thr(&(pair->eatom[0]), nall, nthreads, 1, tid);
+      }
+      if (vflag & 4) {
+	sync_threads();
+	data_reduce_thr(&(pair->vatom[0][0]), nall, nthreads, 6, tid);
+      }
+    }
   }
-  if (dihed->vflag_atom && lmp->atom->nmax > maxvatom_thr) {
-    maxvatom_thr = lmp->atom->nmax;
-    lmp->memory->grow(vatom_thr,nthreads,maxeatom_thr,6,"thr_omp:vatom_thr");
+    break;
+
+  case THR_PAIR|THR_PROXY: {
+    Pair * const pair = lmp->force->pair;
+    
+    if (tid >= nproxy && pair->vflag_fdotr) {
+      if (lmp->neighbor->includegroup == 0)
+	thr->virial_fdotr_compute(x, nlocal, nghost, -1);
+      else
+	thr->virial_fdotr_compute(x, nlocal, nghost, nfirst);
+    }
+    
+    if (evflag) {
+#if defined(_OPENMP)
+#pragma omp critical
+#endif
+      {
+	if (tid < nproxy) {
+	  // nothing to do for kspace?
+	  if (vflag & 3)
+	    for (int i=0; i < 6; ++i) {
+	      thr->virial_pair[i] = 0.0;
+	    }
+	} else {
+	  if (eflag & 1) {
+	    pair->eng_vdwl += thr->eng_vdwl;
+	    pair->eng_coul += thr->eng_coul;
+	    thr->eng_vdwl = 0.0;
+	    thr->eng_coul = 0.0;
+	  }
+	  if (vflag & 3)
+	    for (int i=0; i < 6; ++i) {
+	      pair->virial[i] += thr->virial_pair[i];
+	      thr->virial_pair[i] = 0.0;
+	    }
+	}
+      }
+      if (eflag & 2) {
+	sync_threads();
+	data_reduce_thr(&(pair->eatom[0]), nall, nthreads, 1, tid);
+      }
+      if (vflag & 4) {
+	sync_threads();
+	data_reduce_thr(&(pair->vatom[0][0]), nall, nthreads, 6, tid);
+      }
+    }
   }
+    break;
 
-  int ntotal = (lmp->force->newton_bond) ? 
-    (lmp->atom->nlocal + lmp->atom->nghost) : lmp->atom->nlocal;
+  case THR_BOND:
 
-  // set up per thread accumulators
-  ev_setup_acc_thr(ntotal, dihed->eflag_global, dihed->vflag_global,
-		   dihed->eflag_atom, dihed->vflag_atom, nthreads);
-}
+    if (evflag) {
+      Bond * const bond = lmp->force->bond;
+#if defined(_OPENMP)
+#pragma omp critical
+#endif
+      {
+	bond->energy += thr->eng_bond;
+	for (int i=0; i < 6; ++i)
+	  bond->virial[i] += thr->virial_bond[i];
+      }
+      if (eflag & 2) {
+	sync_threads();
+	data_reduce_thr(&(bond->eatom[0]), nall, nthreads, 1, tid);
+      }
+      if (vflag & 4) {
+	sync_threads();
+	data_reduce_thr(&(bond->vatom[0][0]), nall, nthreads, 6, tid);
+      }
+    }
+    break;
 
-/* ---------------------------------------------------------------------- */
+  case THR_ANGLE:
 
-void ThrOMP::ev_setup_thr(Pair *pair)
-{
-  int nthreads = lmp->comm->nthreads;
+    if (evflag) {
+      Angle * const angle = lmp->force->angle;
+#if defined(_OPENMP)
+#pragma omp critical
+#endif
+      {
+	angle->energy += thr->eng_angle;
+	for (int i=0; i < 6; ++i)
+	  angle->virial[i] += thr->virial_angle[i];
+      }
+      if (eflag & 2) {
+	sync_threads();
+	data_reduce_thr(&(angle->eatom[0]), nall, nthreads, 1, tid);
+      }
+      if (vflag & 4) {
+	sync_threads();
+	data_reduce_thr(&(angle->vatom[0][0]), nall, nthreads, 6, tid);
+      }
+    }
+    break;
 
-  // reallocate per-atom arrays if necessary
-  if (pair->eflag_atom && lmp->atom->nmax > maxeatom_thr) {
-    maxeatom_thr = lmp->atom->nmax;
-    lmp->memory->grow(eatom_thr,nthreads,maxeatom_thr,"thr_omp:eatom_thr");
-  }
-  if (pair->vflag_atom && lmp->atom->nmax > maxvatom_thr) {
-    maxvatom_thr = lmp->atom->nmax;
-    lmp->memory->grow(vatom_thr,nthreads,maxeatom_thr,6,"thr_omp:vatom_thr");
-  }
+  case THR_DIHEDRAL:
+    
+    if (evflag) {
+      Dihedral * const dihedral = lmp->force->dihedral;
+#if defined(_OPENMP)
+#pragma omp critical
+#endif
+      {
+	dihedral->energy += thr->eng_dihed;
+	for (int i=0; i < 6; ++i)
+	  dihedral->virial[i] += thr->virial_dihed[i];
+      }
+      if (eflag & 2) {
+	sync_threads();
+	data_reduce_thr(&(dihedral->eatom[0]), nall, nthreads, 1, tid);
+      }
+      if (vflag & 4) {
+	sync_threads();
+	data_reduce_thr(&(dihedral->vatom[0][0]), nall, nthreads, 6, tid);
+      }
+    }
+    break;
 
-  int ntotal = (lmp->force->newton) ?
-    (lmp->atom->nlocal + lmp->atom->nghost) : lmp->atom->nlocal;
+  case THR_DIHEDRAL|THR_CHARMM: // special case for CHARMM dihedrals
+
+    if (evflag) {
+      Dihedral * const dihedral = lmp->force->dihedral;
+      Pair * const pair = lmp->force->pair;
+#if defined(_OPENMP)
+#pragma omp critical
+#endif
+      {
+	if (eflag & 1) {
+	  dihedral->energy += thr->eng_dihed;
+	  pair->eng_vdwl += thr->eng_vdwl;
+	  pair->eng_coul += thr->eng_coul;
+	}
+
+	if (vflag & 3) {
+	  for (int i=0; i < 6; ++i) {
+	    dihedral->virial[i] += thr->virial_dihed[i];
+	    pair->virial[i] += thr->virial_pair[i];
+	  }
+	}
+      }
+      if (eflag & 2) {
+	sync_threads();
+	data_reduce_thr(&(dihedral->eatom[0]), nall, nthreads, 1, tid);
+	data_reduce_thr(&(pair->eatom[0]), nall, nthreads, 1, tid);
+      }
+      if (vflag & 4) {
+	sync_threads();
+	data_reduce_thr(&(dihedral->vatom[0][0]), nall, nthreads, 6, tid);
+	data_reduce_thr(&(pair->vatom[0][0]), nall, nthreads, 6, tid);
+      }
+    }
+    break;
+
+  case THR_IMPROPER:
+
+    if (evflag) {
+      Improper *improper = lmp->force->improper;
+#if defined(_OPENMP)
+#pragma omp critical
+#endif
+      {
+	improper->energy += thr->eng_imprp;
+	for (int i=0; i < 6; ++i)
+	  improper->virial[i] += thr->virial_imprp[i];
+      }
+      if (eflag & 2) {
+	sync_threads();
+	data_reduce_thr(&(improper->eatom[0]), nall, nthreads, 1, tid);
+      }
+      if (vflag & 4) {
+	sync_threads();
+	data_reduce_thr(&(improper->vatom[0][0]), nall, nthreads, 6, tid);
+      }
+    }
+    break;
+
+  case THR_KSPACE|THR_PROXY: // fallthrough
+  case THR_KSPACE:
+    // nothing to do (for now)
+#if 0
+    if (evflag) {
+      KSpace *kspace = lmp->force->kspace;
+#if defined(_OPENMP)
+#pragma omp critical
+#endif
+      {
+	kspace->energy += thr->eng_kspce;
+	for (int i=0; i < 6; ++i)
+	  kspace->virial[i] += thr->virial_kspce[i];
+      }
+      if (eflag & 2) {
+	sync_threads();
+	data_reduce_thr(&(kspace->eatom[0]), nall, nthreads, 1, tid);
+      }
+      if (vflag & 4) {
+	sync_threads();
+	data_reduce_thr(&(kspace->vatom[0][0]), nall, nthreads, 6, tid);
+      }
+    }
+#endif
+    break;
 
-  // set up per thread accumulators
-  ev_setup_acc_thr(ntotal, pair->eflag_global, pair->vflag_global,
-		   pair->eflag_atom, pair->vflag_atom, nthreads);
+  default:
+    printf("tid:%d unhandled thr_style case %d\n", tid, thr_style);
+    break;
+  }
+    
+    if (style == fix->last_omp_style) {
+    sync_threads();
+    data_reduce_thr(&(f[0][0]), nall, nthreads, 3, tid);
+    if (lmp->atom->torque)
+      data_reduce_thr(&(lmp->atom->torque[0][0]), nall, nthreads, 3, tid);
+  }
 }
 
 /* ----------------------------------------------------------------------
-   reduce the per thread accumulated E/V data into the canonical accumulators.
+   tally eng_vdwl and eng_coul into per thread global and per-atom accumulators
 ------------------------------------------------------------------------- */
-void ThrOMP::ev_reduce_thr(Dihedral *dihed)
+
+void ThrOMP::e_tally_thr(Pair * const pair, const int i, const int j, 
+			 const int nlocal, const int newton_pair,
+			 const double evdwl, const double ecoul, ThrData * const thr)
 {
-  int nthreads = lmp->comm->nthreads;
-  int ntotal = (lmp->force->newton_bond) ?
-    (lmp->atom->nlocal + lmp->atom->nghost) : lmp->atom->nlocal;
-
-  for (int n = 0; n < nthreads; ++n) {
-    dihed->energy += eng_bond_thr[n];
-    if (dihed->vflag_either) {
-      dihed->virial[0] += virial_thr[n][0];
-      dihed->virial[1] += virial_thr[n][1];
-      dihed->virial[2] += virial_thr[n][2];
-      dihed->virial[3] += virial_thr[n][3];
-      dihed->virial[4] += virial_thr[n][4];
-      dihed->virial[5] += virial_thr[n][5];
-      if (dihed->vflag_atom) {
-        for (int i = 0; i < ntotal; ++i) {
-          dihed->vatom[i][0] += vatom_thr[n][i][0];
-          dihed->vatom[i][1] += vatom_thr[n][i][1];
-          dihed->vatom[i][2] += vatom_thr[n][i][2];
-          dihed->vatom[i][3] += vatom_thr[n][i][3];
-          dihed->vatom[i][4] += vatom_thr[n][i][4];
-          dihed->vatom[i][5] += vatom_thr[n][i][5];
-        }
+  if (pair->eflag_global) {
+    if (newton_pair) {
+      thr->eng_vdwl += evdwl;
+      thr->eng_coul += ecoul;
+    } else {
+      const double evdwlhalf = 0.5*evdwl;
+      const double ecoulhalf = 0.5*ecoul;
+      if (i < nlocal) {
+	thr->eng_vdwl += evdwlhalf;
+	thr->eng_coul += ecoulhalf;
       }
-    }
-    if (dihed->eflag_atom) {
-      for (int i = 0; i < ntotal; ++i) {
-        dihed->eatom[i] += eatom_thr[n][i];
+      if (j < nlocal) {
+	thr->eng_vdwl += evdwlhalf;
+	thr->eng_coul += ecoulhalf;
       }
     }
   }
+  if (pair->eflag_atom) {
+    const double epairhalf = 0.5 * (evdwl + ecoul);
+    if (newton_pair || i < nlocal) thr->eatom_pair[i] += epairhalf;
+    if (newton_pair || j < nlocal) thr->eatom_pair[j] += epairhalf;
+  }
+}
+
+/* helper functions */
+static void v_tally(double * const vout, const double * const vin) 
+{
+  vout[0] += vin[0];
+  vout[1] += vin[1];
+  vout[2] += vin[2];
+  vout[3] += vin[3];
+  vout[4] += vin[4];
+  vout[5] += vin[5];
+}
+
+static void v_tally(double * const vout, const double scale, const double * const vin) 
+{
+  vout[0] += scale*vin[0];
+  vout[1] += scale*vin[1];
+  vout[2] += scale*vin[2];
+  vout[3] += scale*vin[3];
+  vout[4] += scale*vin[4];
+  vout[5] += scale*vin[5];
 }
 
 /* ----------------------------------------------------------------------
-   reduce the per thread accumulated E/V data into the canonical accumulators.
+   tally virial into per thread global and per-atom accumulators
 ------------------------------------------------------------------------- */
-void ThrOMP::ev_reduce_thr(Pair *pair)
+void ThrOMP::v_tally_thr(Pair * const pair, const int i, const int j, 
+			 const int nlocal, const int newton_pair,
+			 const double * const v, ThrData * const thr)
 {
-  const int nthreads = lmp->comm->nthreads;
-  const int ntotal = (lmp->force->newton) ? 
-    (lmp->atom->nlocal + lmp->atom->nghost) : lmp->atom->nlocal;
-
-  for (int n = 0; n < nthreads; ++n) {
-    pair->eng_vdwl += eng_vdwl_thr[n];
-    pair->eng_coul += eng_coul_thr[n];
-    if (pair->vflag_either) {
-      pair->virial[0] += virial_thr[n][0];
-      pair->virial[1] += virial_thr[n][1];
-      pair->virial[2] += virial_thr[n][2];
-      pair->virial[3] += virial_thr[n][3];
-      pair->virial[4] += virial_thr[n][4];
-      pair->virial[5] += virial_thr[n][5];
-      if (pair->vflag_atom) {
-        for (int i = 0; i < ntotal; ++i) {
-          pair->vatom[i][0] += vatom_thr[n][i][0];
-          pair->vatom[i][1] += vatom_thr[n][i][1];
-          pair->vatom[i][2] += vatom_thr[n][i][2];
-          pair->vatom[i][3] += vatom_thr[n][i][3];
-          pair->vatom[i][4] += vatom_thr[n][i][4];
-          pair->vatom[i][5] += vatom_thr[n][i][5];
-        }
-      }
+  if (pair->vflag_global) {
+    double * const va = thr->virial_pair;
+    if (newton_pair) {
+      v_tally(va,v);
+    } else {
+      if (i < nlocal) v_tally(va,0.5,v);
+      if (j < nlocal) v_tally(va,0.5,v);
     }
-    if (pair->eflag_atom) {
-      for (int i = 0; i < ntotal; ++i) {
-        pair->eatom[i] += eatom_thr[n][i];
-      }
+  }
+
+  if (pair->vflag_atom) {
+    if (newton_pair || i < nlocal) {
+      double * const va = thr->vatom_pair[i];
+      v_tally(va,0.5,v);
+    }
+    if (newton_pair || j < nlocal) {
+      double * const va = thr->vatom_pair[j];
+      v_tally(va,0.5,v);
     }
   }
 }
@@ -232,39 +500,17 @@ void ThrOMP::ev_reduce_thr(Pair *pair)
    need i < nlocal test since called by bond_quartic and dihedral_charmm
 ------------------------------------------------------------------------- */
 
-void ThrOMP::ev_tally_thr(Pair *pair, int i, int j, int nlocal,
-			  int newton_pair, double evdwl, double ecoul,
-			  double fpair, double delx, double dely,
-			  double delz, int tid)
+void ThrOMP::ev_tally_thr(Pair * const pair, const int i, const int j, const int nlocal,
+			  const int newton_pair, const double evdwl, const double ecoul,
+			  const double fpair, const double delx, const double dely,
+			  const double delz, ThrData * const thr)
 {
-  double evdwlhalf,ecoulhalf,epairhalf,v[6];
 
-  if (pair->eflag_either) {
-    if (pair->eflag_global) {
-      if (newton_pair) {
-	eng_vdwl_thr[tid] += evdwl;
-	eng_coul_thr[tid] += ecoul;
-      } else {
-	evdwlhalf = 0.5*evdwl;
-	ecoulhalf = 0.5*ecoul;
-	if (i < nlocal) {
-	  eng_vdwl_thr[tid] += evdwlhalf;
-	  eng_coul_thr[tid] += ecoulhalf;
-	}
-	if (j < nlocal) {
-	  eng_vdwl_thr[tid] += evdwlhalf;
-	  eng_coul_thr[tid] += ecoulhalf;
-	}
-      }
-    }
-    if (pair->eflag_atom) {
-      epairhalf = 0.5 * (evdwl + ecoul);
-      if (newton_pair || i < nlocal) eatom_thr[tid][i] += epairhalf;
-      if (newton_pair || j < nlocal) eatom_thr[tid][j] += epairhalf;
-    }
-  }
+  if (pair->eflag_either)
+    e_tally_thr(pair, i, j, nlocal, newton_pair, evdwl, ecoul, thr);
 
   if (pair->vflag_either) {
+    double v[6];
     v[0] = delx*delx*fpair;
     v[1] = dely*dely*fpair;
     v[2] = delz*delz*fpair;
@@ -272,52 +518,7 @@ void ThrOMP::ev_tally_thr(Pair *pair, int i, int j, int nlocal,
     v[4] = delx*delz*fpair;
     v[5] = dely*delz*fpair;
 
-    if (pair->vflag_global) {
-      if (newton_pair) {
-	virial_thr[tid][0] += v[0];
-	virial_thr[tid][1] += v[1];
-	virial_thr[tid][2] += v[2];
-	virial_thr[tid][3] += v[3];
-	virial_thr[tid][4] += v[4];
-	virial_thr[tid][5] += v[5];
-      } else {
-	if (i < nlocal) {
-	  virial_thr[tid][0] += 0.5*v[0];
-	  virial_thr[tid][1] += 0.5*v[1];
-	  virial_thr[tid][2] += 0.5*v[2];
-	  virial_thr[tid][3] += 0.5*v[3];
-	  virial_thr[tid][4] += 0.5*v[4];
-	  virial_thr[tid][5] += 0.5*v[5];
-	}
-	if (j < nlocal) {
-	  virial_thr[tid][0] += 0.5*v[0];
-	  virial_thr[tid][1] += 0.5*v[1];
-	  virial_thr[tid][2] += 0.5*v[2];
-	  virial_thr[tid][3] += 0.5*v[3];
-	  virial_thr[tid][4] += 0.5*v[4];
-	  virial_thr[tid][5] += 0.5*v[5];
-	}
-      }
-    }
-
-    if (pair->vflag_atom) {
-      if (newton_pair || i < nlocal) {
-	vatom_thr[tid][i][0] += 0.5*v[0];
-	vatom_thr[tid][i][1] += 0.5*v[1];
-	vatom_thr[tid][i][2] += 0.5*v[2];
-	vatom_thr[tid][i][3] += 0.5*v[3];
-	vatom_thr[tid][i][4] += 0.5*v[4];
-	vatom_thr[tid][i][5] += 0.5*v[5];
-      }
-      if (newton_pair || j < nlocal) {
-	vatom_thr[tid][j][0] += 0.5*v[0];
-	vatom_thr[tid][j][1] += 0.5*v[1];
-	vatom_thr[tid][j][2] += 0.5*v[2];
-	vatom_thr[tid][j][3] += 0.5*v[3];
-	vatom_thr[tid][j][4] += 0.5*v[4];
-	vatom_thr[tid][j][5] += 0.5*v[5];
-      }
-    }
+    v_tally_thr(pair, i, j, nlocal, newton_pair, v, thr);
   }
 }
 
@@ -326,39 +527,19 @@ void ThrOMP::ev_tally_thr(Pair *pair, int i, int j, int nlocal,
    for virial, have delx,dely,delz and fx,fy,fz
 ------------------------------------------------------------------------- */
 
-void ThrOMP::ev_tally_xyz_thr(Pair *pair, int i, int j, int nlocal,
-			      int newton_pair, double evdwl, double ecoul,
-			      double fx, double fy, double fz,
-			      double delx, double dely, double delz, int tid)
+void ThrOMP::ev_tally_xyz_thr(Pair * const pair, const int i, const int j,
+			      const int nlocal, const int newton_pair, 
+			      const double evdwl, const double ecoul,
+			      const double fx, const double fy, const double fz,
+			      const double delx, const double dely, const double delz,
+			      ThrData * const thr)
 {
-  double evdwlhalf,ecoulhalf,epairhalf,v[6];
 
-  if (pair->eflag_either) {
-    if (pair->eflag_global) {
-      if (newton_pair) {
-	eng_vdwl_thr[tid] += evdwl;
-	eng_coul_thr[tid] += ecoul;
-      } else {
-	evdwlhalf = 0.5*evdwl;
-	ecoulhalf = 0.5*ecoul;
-	if (i < nlocal) {
-	  eng_vdwl_thr[tid] += evdwlhalf;
-	  eng_coul_thr[tid] += ecoulhalf;
-	}
-	if (j < nlocal) {
-	  eng_vdwl_thr[tid] += evdwlhalf;
-	  eng_coul_thr[tid] += ecoulhalf;
-	}
-      }
-    }
-    if (pair->eflag_atom) {
-      epairhalf = 0.5 * (evdwl + ecoul);
-      if (newton_pair || i < nlocal) eatom_thr[tid][i] += epairhalf;
-      if (newton_pair || j < nlocal) eatom_thr[tid][j] += epairhalf;
-    }
-  }
+  if (pair->eflag_either)
+    e_tally_thr(pair, i, j, nlocal, newton_pair, evdwl, ecoul, thr);
 
   if (pair->vflag_either) {
+    double v[6];
     v[0] = delx*fx;
     v[1] = dely*fy;
     v[2] = delz*fz;
@@ -366,52 +547,7 @@ void ThrOMP::ev_tally_xyz_thr(Pair *pair, int i, int j, int nlocal,
     v[4] = delx*fz;
     v[5] = dely*fz;
 
-    if (pair->vflag_global) {
-      if (newton_pair) {
-	virial_thr[tid][0] += v[0];
-	virial_thr[tid][1] += v[1];
-	virial_thr[tid][2] += v[2];
-	virial_thr[tid][3] += v[3];
-	virial_thr[tid][4] += v[4];
-	virial_thr[tid][5] += v[5];
-      } else {
-	if (i < nlocal) {
-	  virial_thr[tid][0] += 0.5*v[0];
-	  virial_thr[tid][1] += 0.5*v[1];
-	  virial_thr[tid][2] += 0.5*v[2];
-	  virial_thr[tid][3] += 0.5*v[3];
-	  virial_thr[tid][4] += 0.5*v[4];
-	  virial_thr[tid][5] += 0.5*v[5];
-	}
-	if (j < nlocal) {
-	  virial_thr[tid][0] += 0.5*v[0];
-	  virial_thr[tid][1] += 0.5*v[1];
-	  virial_thr[tid][2] += 0.5*v[2];
-	  virial_thr[tid][3] += 0.5*v[3];
-	  virial_thr[tid][4] += 0.5*v[4];
-	  virial_thr[tid][5] += 0.5*v[5];
-	}
-      }
-    }
-
-    if (pair->vflag_atom) {
-      if (newton_pair || i < nlocal) {
-	vatom_thr[tid][i][0] += 0.5*v[0];
-	vatom_thr[tid][i][1] += 0.5*v[1];
-	vatom_thr[tid][i][2] += 0.5*v[2];
-	vatom_thr[tid][i][3] += 0.5*v[3];
-	vatom_thr[tid][i][4] += 0.5*v[4];
-	vatom_thr[tid][i][5] += 0.5*v[5];
-      }
-      if (newton_pair || j < nlocal) {
-	vatom_thr[tid][j][0] += 0.5*v[0];
-	vatom_thr[tid][j][1] += 0.5*v[1];
-	vatom_thr[tid][j][2] += 0.5*v[2];
-	vatom_thr[tid][j][3] += 0.5*v[3];
-	vatom_thr[tid][j][4] += 0.5*v[4];
-	vatom_thr[tid][j][5] += 0.5*v[5];
-      }
-    }
+    v_tally_thr(pair, i, j, nlocal, newton_pair, v, thr);
   }
 }
 
@@ -421,25 +557,28 @@ void ThrOMP::ev_tally_xyz_thr(Pair *pair, int i, int j, int nlocal,
    virial = riFi + rjFj + rkFk = (rj-ri) Fj + (rk-ri) Fk = drji*fj + drki*fk
  ------------------------------------------------------------------------- */
 
-void ThrOMP::ev_tally3_thr(Pair *pair, int i, int j, int k, double evdwl, double ecoul,
-			   double *fj, double *fk, double *drji, double *drki, int tid)
+void ThrOMP::ev_tally3_thr(Pair * const pair, const int i, const int j, const int k,
+			   const double evdwl, const double ecoul,
+			   const double * const fj, const double * const fk,
+			   const double * const drji, const double * const drki,
+			   ThrData * const thr)
 {
-  double epairthird,v[6];
-
   if (pair->eflag_either) {
     if (pair->eflag_global) {
-      eng_vdwl_thr[tid] += evdwl;
-      eng_coul_thr[tid] += ecoul;
+      thr->eng_vdwl += evdwl;
+      thr->eng_coul += ecoul;
     }
     if (pair->eflag_atom) {
-      epairthird = THIRD * (evdwl + ecoul);
-      eatom_thr[tid][i] += epairthird;
-      eatom_thr[tid][j] += epairthird;
-      eatom_thr[tid][k] += epairthird;
+      const double epairthird = THIRD * (evdwl + ecoul);
+      thr->eatom_pair[i] += epairthird;
+      thr->eatom_pair[j] += epairthird;
+      thr->eatom_pair[k] += epairthird;
     }
   }
 
   if (pair->vflag_either) {
+    double v[6];
+
     v[0] = drji[0]*fj[0] + drki[0]*fk[0];
     v[1] = drji[1]*fj[1] + drki[1]*fk[1];
     v[2] = drji[2]*fj[2] + drki[2]*fk[2];
@@ -447,21 +586,12 @@ void ThrOMP::ev_tally3_thr(Pair *pair, int i, int j, int k, double evdwl, double
     v[4] = drji[0]*fj[2] + drki[0]*fk[2];
     v[5] = drji[1]*fj[2] + drki[1]*fk[2];
       
-    if (pair->vflag_global) {
-      virial_thr[tid][0] += v[0];
-      virial_thr[tid][1] += v[1];
-      virial_thr[tid][2] += v[2];
-      virial_thr[tid][3] += v[3];
-      virial_thr[tid][4] += v[4];
-      virial_thr[tid][5] += v[5];
-    }
+    if (pair->vflag_global) v_tally(thr->virial_pair,v);
 
     if (pair->vflag_atom) {
-      for (int n=0; n < 6; ++n) {
-	vatom_thr[tid][i][n] += THIRD*v[n];
-	vatom_thr[tid][j][n] += THIRD*v[n];
-	vatom_thr[tid][k][n] += THIRD*v[n];
-      }
+      v_tally(thr->vatom_pair[i],THIRD,v);
+      v_tally(thr->vatom_pair[j],THIRD,v);
+      v_tally(thr->vatom_pair[k],THIRD,v);
     }
   }
 }
@@ -471,20 +601,23 @@ void ThrOMP::ev_tally3_thr(Pair *pair, int i, int j, int k, double evdwl, double
    called by AIREBO potential, newton_pair is always on
  ------------------------------------------------------------------------- */
 
-void ThrOMP::ev_tally4_thr(Pair *pair, int i, int j, int k, int m, double evdwl,
-			   double *fi, double *fj, double *fk,
-			   double *drim, double *drjm, double *drkm,int tid)
+void ThrOMP::ev_tally4_thr(Pair * const pair, const int i, const int j,
+			   const int k, const int m, const double evdwl,
+			   const double * const fi, const double * const fj,
+			   const double * const fk, const double * const drim,
+			   const double * const drjm, const double * const drkm,
+			   ThrData * const thr)
 {
-  double epairfourth,v[6];
+  double v[6];
 
   if (pair->eflag_either) {
-    if (pair->eflag_global) eng_vdwl_thr[tid] += evdwl;
+    if (pair->eflag_global) thr->eng_vdwl += evdwl;
     if (pair->eflag_atom) {
-      epairfourth = 0.25 * evdwl;
-      eatom_thr[tid][i] += epairfourth;
-      eatom_thr[tid][j] += epairfourth;
-      eatom_thr[tid][k] += epairfourth;
-      eatom_thr[tid][m] += epairfourth;
+      const double epairfourth = 0.25 * evdwl;
+      thr->eatom_pair[i] += epairfourth;
+      thr->eatom_pair[j] += epairfourth;
+      thr->eatom_pair[k] += epairfourth;
+      thr->eatom_pair[m] += epairfourth;
     }
   }
 
@@ -496,14 +629,10 @@ void ThrOMP::ev_tally4_thr(Pair *pair, int i, int j, int k, int m, double evdwl,
     v[4] = 0.25 * (drim[0]*fi[2] + drjm[0]*fj[2] + drkm[0]*fk[2]);
     v[5] = 0.25 * (drim[1]*fi[2] + drjm[1]*fj[2] + drkm[1]*fk[2]);
     
-    vatom_thr[tid][i][0] += v[0]; vatom_thr[tid][i][1] += v[1]; vatom_thr[tid][i][2] += v[2];
-    vatom_thr[tid][i][3] += v[3]; vatom_thr[tid][i][4] += v[4]; vatom_thr[tid][i][5] += v[5];
-    vatom_thr[tid][j][0] += v[0]; vatom_thr[tid][j][1] += v[1]; vatom_thr[tid][j][2] += v[2];
-    vatom_thr[tid][j][3] += v[3]; vatom_thr[tid][j][4] += v[4]; vatom_thr[tid][j][5] += v[5];
-    vatom_thr[tid][k][0] += v[0]; vatom_thr[tid][k][1] += v[1]; vatom_thr[tid][k][2] += v[2];
-    vatom_thr[tid][k][3] += v[3]; vatom_thr[tid][k][4] += v[4]; vatom_thr[tid][k][5] += v[5];
-    vatom_thr[tid][m][0] += v[0]; vatom_thr[tid][m][1] += v[1]; vatom_thr[tid][m][2] += v[2];
-    vatom_thr[tid][m][3] += v[3]; vatom_thr[tid][m][4] += v[4]; vatom_thr[tid][m][5] += v[5];
+    v_tally(thr->vatom_pair[i],v);
+    v_tally(thr->vatom_pair[j],v);
+    v_tally(thr->vatom_pair[k],v);
+    v_tally(thr->vatom_pair[m],v);
   }
 }
 
@@ -513,48 +642,248 @@ void ThrOMP::ev_tally4_thr(Pair *pair, int i, int j, int k, int m, double evdwl,
    changes v values by dividing by n
  ------------------------------------------------------------------------- */
 
-void ThrOMP::ev_tally_list_thr(Pair *pair, int n, int *list, double ecoul, double *v, int tid)
+void ThrOMP::ev_tally_list_thr(Pair * const pair, const int n,
+			       const int * const list, const double ecoul,
+			       const double * const v, ThrData * const thr)
 {
-  int i,j;
-
   if (pair->eflag_either) {
-    if (pair->eflag_global) eng_coul_thr[tid] += ecoul;
+    if (pair->eflag_global) thr->eng_coul += ecoul;
     if (pair->eflag_atom) {
-      double epairatom = ecoul/n;
-      for (i = 0; i < n; i++) eatom_thr[tid][list[i]] += epairatom;
+      double epairatom = ecoul/static_cast<double>(n);
+      for (int i = 0; i < n; i++) thr->eatom_pair[list[i]] += epairatom;
     }
   }
 
   if (pair->vflag_either) {
-    if (pair->vflag_global) {
-      virial_thr[tid][0] += v[0];
-      virial_thr[tid][1] += v[1];
-      virial_thr[tid][2] += v[2];
-      virial_thr[tid][3] += v[3];
-      virial_thr[tid][4] += v[4];
-      virial_thr[tid][5] += v[5];
-    }
+    if (pair->vflag_global)
+      v_tally(thr->virial_pair,v);
 
     if (pair->vflag_atom) {
-      v[0] /= n;
-      v[1] /= n;
-      v[2] /= n;
-      v[3] /= n;
-      v[4] /= n;
-      v[5] /= n;
-      for (i = 0; i < n; i++) {
-	j = list[i];
-	vatom_thr[tid][j][0] += v[0];
-	vatom_thr[tid][j][1] += v[1];
-	vatom_thr[tid][j][2] += v[2];
-	vatom_thr[tid][j][3] += v[3];
-	vatom_thr[tid][j][4] += v[4];
-	vatom_thr[tid][j][5] += v[5];
+      const double s = 1.0/static_cast<double>(n);
+      double vtmp[6];
+
+      vtmp[0] = s * v[0];
+      vtmp[1] = s * v[1];
+      vtmp[2] = s * v[2];
+      vtmp[3] = s * v[3];
+      vtmp[4] = s * v[4];
+      vtmp[5] = s * v[5];
+
+      for (int i = 0; i < n; i++) {
+	const int j = list[i];
+	v_tally(thr->vatom_pair[j],vtmp);
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   tally energy and virial into global and per-atom accumulators
+------------------------------------------------------------------------- */
+
+void ThrOMP::ev_tally_thr(Bond * const bond, const int i, const int j, const int nlocal,
+			  const int newton_bond, const double ebond, const double fbond,
+			  const double delx, const double dely, const double delz,
+			  ThrData * const thr)
+{
+  if (bond->eflag_either) {
+    const double ebondhalf = 0.5*ebond;
+    if (newton_bond) {
+      if (bond->eflag_global)
+	thr->eng_bond += ebond;
+      if (bond->eflag_atom) {
+	thr->eatom_bond[i] += ebondhalf;
+	thr->eatom_bond[j] += ebondhalf;
+      }
+    } else {
+      if (bond->eflag_global) {
+	if (i < nlocal) thr->eng_bond += ebondhalf;
+	if (j < nlocal) thr->eng_bond += ebondhalf;
+      }
+      if (bond->eflag_atom) {
+	if (i < nlocal) thr->eatom_bond[i] += ebondhalf;
+	if (j < nlocal) thr->eatom_bond[j] += ebondhalf;
+      }
+    }
+  }
+
+  if (bond->vflag_either) {
+    double v[6];
+
+    v[0] = delx*delx*fbond;
+    v[1] = dely*dely*fbond;
+    v[2] = delz*delz*fbond;
+    v[3] = delx*dely*fbond;
+    v[4] = delx*delz*fbond;
+    v[5] = dely*delz*fbond;
+
+    if (bond->vflag_global) {
+      if (newton_bond)
+	v_tally(thr->virial_bond,v);
+      else {
+	if (i < nlocal)
+	  v_tally(thr->virial_bond,0.5,v);
+	if (j < nlocal)
+	  v_tally(thr->virial_bond,0.5,v);
+      }
+    }
+
+    if (bond->vflag_atom) {
+      v[0] *= 0.5;
+      v[1] *= 0.5;
+      v[2] *= 0.5;
+      v[3] *= 0.5;
+      v[4] *= 0.5;
+      v[5] *= 0.5;
+
+      if (newton_bond) {
+	v_tally(thr->vatom_bond[i],v);
+	v_tally(thr->vatom_bond[j],v);
+      } else {
+	if (j < nlocal)
+	  v_tally(thr->vatom_bond[i],v);
+	if (j < nlocal)
+	  v_tally(thr->vatom_bond[j],v);
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   tally energy and virial into global and per-atom accumulators
+   virial = r1F1 + r2F2 + r3F3 = (r1-r2) F1 + (r3-r2) F3 = del1*f1 + del2*f3
+------------------------------------------------------------------------- */
+
+void ThrOMP::ev_tally_thr(Angle * const angle, const int i, const int j, const int k,
+			  const int nlocal, const int newton_bond, const double eangle,
+			  const double * const f1, const double * const f3,
+			  const double delx1, const double dely1, const double delz1,
+			  const double delx2, const double dely2, const double delz2,
+			  ThrData * const thr)
+{
+  if (angle->eflag_either) {
+    const double eanglethird = THIRD*eangle;
+    if (newton_bond) {
+      if (angle->eflag_global)
+	thr->eng_angle += eangle;
+      if (angle->eflag_atom) {
+	thr->eatom_angle[i] += eanglethird;
+	thr->eatom_angle[j] += eanglethird;
+	thr->eatom_angle[k] += eanglethird;
       }
+    } else {
+      if (angle->eflag_global) {
+	if (i < nlocal) thr->eng_angle += eanglethird;
+	if (j < nlocal) thr->eng_angle += eanglethird;
+	if (k < nlocal) thr->eng_angle += eanglethird;
+      }
+      if (angle->eflag_atom) {
+	if (i < nlocal) thr->eatom_angle[i] += eanglethird;
+	if (j < nlocal) thr->eatom_angle[j] += eanglethird;
+	if (k < nlocal) thr->eatom_angle[k] += eanglethird;
+      }
+    }
+  }
+
+  if (angle->vflag_either) {
+    double v[6];
+
+    v[0] = delx1*f1[0] + delx2*f3[0];
+    v[1] = dely1*f1[1] + dely2*f3[1];
+    v[2] = delz1*f1[2] + delz2*f3[2];
+    v[3] = delx1*f1[1] + delx2*f3[1];
+    v[4] = delx1*f1[2] + delx2*f3[2];
+    v[5] = dely1*f1[2] + dely2*f3[2];
+
+    if (angle->vflag_global) {
+      if (newton_bond) {
+	v_tally(thr->virial_angle,v);
+      } else {
+	int cnt = 0;
+	if (i < nlocal) ++cnt;
+	if (j < nlocal) ++cnt;
+	if (k < nlocal) ++cnt;
+	v_tally(thr->virial_angle,cnt*THIRD,v);
+      }
+    }
+
+    if (angle->vflag_atom) {
+      v[0] *= THIRD;
+      v[1] *= THIRD;
+      v[2] *= THIRD;
+      v[3] *= THIRD;
+      v[4] *= THIRD;
+      v[5] *= THIRD;
+
+      if (newton_bond) {
+	v_tally(thr->vatom_angle[i],v);
+	v_tally(thr->vatom_angle[j],v);
+	v_tally(thr->vatom_angle[k],v);
+      } else {
+	if (j < nlocal) v_tally(thr->vatom_angle[i],v);
+	if (j < nlocal) v_tally(thr->vatom_angle[j],v);
+	if (k < nlocal) v_tally(thr->vatom_angle[k],v);
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   tally energy and virial from 1-3 repulsion of SDK angle into accumulators
+------------------------------------------------------------------------- */
+
+void ThrOMP::ev_tally13_thr(Angle * const angle, const int i1, const int i3,
+			    const int nlocal, const int newton_bond,
+			    const double epair, const double fpair,
+			    const double delx, const double dely,
+			    const double delz, ThrData * const thr)
+{
+
+  if (angle->eflag_either) {
+    const double epairhalf = 0.5 * epair;
+
+    if (angle->eflag_global) {
+      if (newton_bond || i1 < nlocal)
+	thr->eng_angle += epairhalf;
+      if (newton_bond || i3 < nlocal)
+	thr->eng_angle += epairhalf;
+    }
+
+    if (angle->eflag_atom) {
+      if (newton_bond || i1 < nlocal) thr->eatom_angle[i1] += epairhalf;
+      if (newton_bond || i3 < nlocal) thr->eatom_angle[i3] += epairhalf;
     }
   }
+  
+  if (angle->vflag_either) {
+    double v[6];
+    v[0] = delx*delx*fpair;
+    v[1] = dely*dely*fpair;
+    v[2] = delz*delz*fpair;
+    v[3] = delx*dely*fpair;
+    v[4] = delx*delz*fpair;
+    v[5] = dely*delz*fpair;
+
+    if (angle->vflag_global) {
+      double * const va = thr->virial_angle;
+      if (newton_bond || i1 < nlocal) v_tally(va,0.5,v);
+      if (newton_bond || i3 < nlocal) v_tally(va,0.5,v);
+    }
+
+    if (angle->vflag_atom) {
+      if (newton_bond || i1 < nlocal) {
+	double * const va = thr->vatom_angle[i1];
+	v_tally(va,0.5,v);
+      }
+      if (newton_bond || i3 < nlocal) {
+	double * const va = thr->vatom_angle[i3];
+	v_tally(va,0.5,v);
+      }
+    }
+  }  
 }
 
+
 /* ----------------------------------------------------------------------
    tally energy and virial into global and per-atom accumulators
    virial = r1F1 + r2F2 + r3F3 + r4F4 = (r1-r2) F1 + (r3-r2) F3 + (r4-r2) F4
@@ -562,40 +891,48 @@ void ThrOMP::ev_tally_list_thr(Pair *pair, int n, int *list, double ecoul, doubl
 	  = vb1*f1 + vb2*f3 + (vb3+vb2)*f4
 ------------------------------------------------------------------------- */
 
-void ThrOMP::ev_tally_thr(Dihedral *dihed, int i1, int i2, int i3, int i4,
-			  int nlocal, int newton_bond,
-			  double edihedral, double *f1, double *f3, double *f4,
-			  double vb1x, double vb1y, double vb1z,
-			  double vb2x, double vb2y, double vb2z,
-			  double vb3x, double vb3y, double vb3z, int tid)
+void ThrOMP::ev_tally_thr(Dihedral * const dihed, const int i1, const int i2,
+			  const int i3, const int i4, const int nlocal,
+			  const int newton_bond, const double edihedral,
+			  const double * const f1, const double * const f3,
+			  const double * const f4, const double vb1x,
+			  const double vb1y, const double vb1z, const double vb2x,
+			  const double vb2y, const double vb2z, const double vb3x,
+			  const double vb3y, const double vb3z, ThrData * const thr)
 {
-  double edihedralquarter,v[6];
-  int cnt;
 
   if (dihed->eflag_either) {
     if (dihed->eflag_global) {
       if (newton_bond) {
-	eng_bond_thr[tid] += edihedral;
+	thr->eng_dihed += edihedral;
       } else {
-	edihedralquarter = 0.25*edihedral;
-	cnt = 0;
+	const double edihedralquarter = 0.25*edihedral;
+	int cnt = 0;
 	if (i1 < nlocal) ++cnt;
 	if (i2 < nlocal) ++cnt;
 	if (i3 < nlocal) ++cnt;
 	if (i4 < nlocal) ++cnt;
-	eng_bond_thr[tid] += static_cast<double>(cnt) * edihedralquarter;
+	thr->eng_dihed += static_cast<double>(cnt)*edihedralquarter;
       }
     }
     if (dihed->eflag_atom) {
-      edihedralquarter = 0.25*edihedral;
-      if (newton_bond || i1 < nlocal) eatom_thr[tid][i1] += edihedralquarter;
-      if (newton_bond || i2 < nlocal) eatom_thr[tid][i2] += edihedralquarter;
-      if (newton_bond || i3 < nlocal) eatom_thr[tid][i3] += edihedralquarter;
-      if (newton_bond || i4 < nlocal) eatom_thr[tid][i4] += edihedralquarter;
+      const double edihedralquarter = 0.25*edihedral;
+      if (newton_bond) {
+	thr->eatom_dihed[i1] += edihedralquarter;
+	thr->eatom_dihed[i2] += edihedralquarter;
+	thr->eatom_dihed[i3] += edihedralquarter;
+	thr->eatom_dihed[i4] += edihedralquarter;
+      } else {
+	if (i1 < nlocal) thr->eatom_dihed[i1] +=  edihedralquarter;
+	if (i2 < nlocal) thr->eatom_dihed[i2] +=  edihedralquarter;
+	if (i3 < nlocal) thr->eatom_dihed[i3] +=  edihedralquarter;
+	if (i4 < nlocal) thr->eatom_dihed[i4] +=  edihedralquarter;
+      }
     }
   }
 
   if (dihed->vflag_either) {
+    double v[6];
     v[0] = vb1x*f1[0] + vb2x*f3[0] + (vb3x+vb2x)*f4[0];
     v[1] = vb1y*f1[1] + vb2y*f3[1] + (vb3y+vb2y)*f4[1];
     v[2] = vb1z*f1[2] + vb2z*f3[2] + (vb3z+vb2z)*f4[2];
@@ -605,80 +942,127 @@ void ThrOMP::ev_tally_thr(Dihedral *dihed, int i1, int i2, int i3, int i4,
 
     if (dihed->vflag_global) {
       if (newton_bond) {
-	virial_thr[tid][0] += v[0];
-	virial_thr[tid][1] += v[1];
-	virial_thr[tid][2] += v[2];
-	virial_thr[tid][3] += v[3];
-	virial_thr[tid][4] += v[4];
-	virial_thr[tid][5] += v[5];
+	v_tally(thr->virial_dihed,v);
       } else {
-	if (i1 < nlocal) {
-	  virial_thr[tid][0] += 0.25*v[0];
-	  virial_thr[tid][1] += 0.25*v[1];
-	  virial_thr[tid][2] += 0.25*v[2];
-	  virial_thr[tid][3] += 0.25*v[3];
-	  virial_thr[tid][4] += 0.25*v[4];
-	  virial_thr[tid][5] += 0.25*v[5];
-	}
-	if (i2 < nlocal) {
-	  virial_thr[tid][0] += 0.25*v[0];
-	  virial_thr[tid][1] += 0.25*v[1];
-	  virial_thr[tid][2] += 0.25*v[2];
-	  virial_thr[tid][3] += 0.25*v[3];
-	  virial_thr[tid][4] += 0.25*v[4];
-	  virial_thr[tid][5] += 0.25*v[5];
-	}
-	if (i3 < nlocal) {
-	  virial_thr[tid][0] += 0.25*v[0];
-	  virial_thr[tid][1] += 0.25*v[1];
-	  virial_thr[tid][2] += 0.25*v[2];
-	  virial_thr[tid][3] += 0.25*v[3];
-	  virial_thr[tid][4] += 0.25*v[4];
-	  virial_thr[tid][5] += 0.25*v[5];
-	}
-	if (i4 < nlocal) {
-	  virial_thr[tid][0] += 0.25*v[0];
-	  virial_thr[tid][1] += 0.25*v[1];
-	  virial_thr[tid][2] += 0.25*v[2];
-	  virial_thr[tid][3] += 0.25*v[3];
-	  virial_thr[tid][4] += 0.25*v[4];
-	  virial_thr[tid][5] += 0.25*v[5];
-	}
+	int cnt = 0;
+	if (i1 < nlocal) ++cnt;
+	if (i2 < nlocal) ++cnt;
+	if (i3 < nlocal) ++cnt;
+	if (i4 < nlocal) ++cnt;
+	v_tally(thr->virial_dihed,0.25*static_cast<double>(cnt),v);
       }
     }
 
+    v[0] *= 0.25;
+    v[1] *= 0.25;
+    v[2] *= 0.25;
+    v[3] *= 0.25;
+    v[4] *= 0.25;
+    v[5] *= 0.25;
+    
     if (dihed->vflag_atom) {
-      if (newton_bond || i1 < nlocal) {
-	vatom_thr[tid][i1][0] += 0.25*v[0];
-	vatom_thr[tid][i1][1] += 0.25*v[1];
-	vatom_thr[tid][i1][2] += 0.25*v[2];
-	vatom_thr[tid][i1][3] += 0.25*v[3];
-	vatom_thr[tid][i1][4] += 0.25*v[4];
-	vatom_thr[tid][i1][5] += 0.25*v[5];
-      }
-      if (newton_bond || i2 < nlocal) {
-	vatom_thr[tid][i2][0] += 0.25*v[0];
-	vatom_thr[tid][i2][1] += 0.25*v[1];
-	vatom_thr[tid][i2][2] += 0.25*v[2];
-	vatom_thr[tid][i2][3] += 0.25*v[3];
-	vatom_thr[tid][i2][4] += 0.25*v[4];
-	vatom_thr[tid][i2][5] += 0.25*v[5];
+      if (newton_bond) {
+	v_tally(thr->vatom_dihed[i1],v);
+	v_tally(thr->vatom_dihed[i2],v);
+	v_tally(thr->vatom_dihed[i3],v);
+	v_tally(thr->vatom_dihed[i4],v);
+      } else {
+	if (i1 < nlocal) v_tally(thr->vatom_dihed[i1],v);
+	if (i2 < nlocal) v_tally(thr->vatom_dihed[i2],v);
+	if (i3 < nlocal) v_tally(thr->vatom_dihed[i3],v);
+	if (i4 < nlocal) v_tally(thr->vatom_dihed[i4],v);
       }
-      if (newton_bond || i3 < nlocal) {
-	vatom_thr[tid][i3][0] += 0.25*v[0];
-	vatom_thr[tid][i3][1] += 0.25*v[1];
-	vatom_thr[tid][i3][2] += 0.25*v[2];
-	vatom_thr[tid][i3][3] += 0.25*v[3];
-	vatom_thr[tid][i3][4] += 0.25*v[4];
-	vatom_thr[tid][i3][5] += 0.25*v[5];
-      }
-      if (newton_bond || i4 < nlocal) {
-	vatom_thr[tid][i4][0] += 0.25*v[0];
-	vatom_thr[tid][i4][1] += 0.25*v[1];
-	vatom_thr[tid][i4][2] += 0.25*v[2];
-	vatom_thr[tid][i4][3] += 0.25*v[3];
-	vatom_thr[tid][i4][4] += 0.25*v[4];
-	vatom_thr[tid][i4][5] += 0.25*v[5];
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   tally energy and virial into global and per-atom accumulators
+   virial = r1F1 + r2F2 + r3F3 + r4F4 = (r1-r2) F1 + (r3-r2) F3 + (r4-r2) F4
+          = (r1-r2) F1 + (r3-r2) F3 + (r4-r3 + r3-r2) F4
+	  = vb1*f1 + vb2*f3 + (vb3+vb2)*f4
+------------------------------------------------------------------------- */
+
+void ThrOMP::ev_tally_thr(Improper * const imprp, const int i1, const int i2,
+			  const int i3, const int i4, const int nlocal,
+			  const int newton_bond, const double eimproper,
+			  const double * const f1, const double * const f3,
+			  const double * const f4, const double vb1x,
+			  const double vb1y, const double vb1z, const double vb2x,
+			  const double vb2y, const double vb2z, const double vb3x,
+			  const double vb3y, const double vb3z, ThrData * const thr)
+{
+
+  if (imprp->eflag_either) {
+    if (imprp->eflag_global) {
+      if (newton_bond) {
+	thr->eng_imprp += eimproper;
+      } else {
+	const double eimproperquarter = 0.25*eimproper;
+	int cnt = 0;
+	if (i1 < nlocal) ++cnt;
+	if (i2 < nlocal) ++cnt;
+	if (i3 < nlocal) ++cnt;
+	if (i4 < nlocal) ++cnt;
+	thr->eng_imprp += static_cast<double>(cnt)*eimproperquarter;
+      }
+    }
+    if (imprp->eflag_atom) {
+      const double eimproperquarter = 0.25*eimproper;
+      if (newton_bond) {
+	thr->eatom_imprp[i1] += eimproperquarter;
+	thr->eatom_imprp[i2] += eimproperquarter;
+	thr->eatom_imprp[i3] += eimproperquarter;
+	thr->eatom_imprp[i4] += eimproperquarter;
+      } else {
+	if (i1 < nlocal) thr->eatom_imprp[i1] +=  eimproperquarter;
+	if (i2 < nlocal) thr->eatom_imprp[i2] +=  eimproperquarter;
+	if (i3 < nlocal) thr->eatom_imprp[i3] +=  eimproperquarter;
+	if (i4 < nlocal) thr->eatom_imprp[i4] +=  eimproperquarter;
+      }
+    }
+  }
+
+  if (imprp->vflag_either) {
+    double v[6];
+    v[0] = vb1x*f1[0] + vb2x*f3[0] + (vb3x+vb2x)*f4[0];
+    v[1] = vb1y*f1[1] + vb2y*f3[1] + (vb3y+vb2y)*f4[1];
+    v[2] = vb1z*f1[2] + vb2z*f3[2] + (vb3z+vb2z)*f4[2];
+    v[3] = vb1x*f1[1] + vb2x*f3[1] + (vb3x+vb2x)*f4[1];
+    v[4] = vb1x*f1[2] + vb2x*f3[2] + (vb3x+vb2x)*f4[2];
+    v[5] = vb1y*f1[2] + vb2y*f3[2] + (vb3y+vb2y)*f4[2];
+
+    if (imprp->vflag_global) {
+      if (newton_bond) {
+	v_tally(thr->virial_imprp,v);
+      } else {
+	int cnt = 0;
+	if (i1 < nlocal) ++cnt;
+	if (i2 < nlocal) ++cnt;
+	if (i3 < nlocal) ++cnt;
+	if (i4 < nlocal) ++cnt;
+	v_tally(thr->virial_imprp,0.25*static_cast<double>(cnt),v);
+      }
+    }
+
+    v[0] *= 0.25;
+    v[1] *= 0.25;
+    v[2] *= 0.25;
+    v[3] *= 0.25;
+    v[4] *= 0.25;
+    v[5] *= 0.25;
+    
+    if (imprp->vflag_atom) {
+      if (newton_bond) {
+	v_tally(thr->vatom_imprp[i1],v);
+	v_tally(thr->vatom_imprp[i2],v);
+	v_tally(thr->vatom_imprp[i3],v);
+	v_tally(thr->vatom_imprp[i4],v);
+      } else {
+	if (i1 < nlocal) v_tally(thr->vatom_imprp[i1],v);
+	if (i2 < nlocal) v_tally(thr->vatom_imprp[i2],v);
+	if (i3 < nlocal) v_tally(thr->vatom_imprp[i3],v);
+	if (i4 < nlocal) v_tally(thr->vatom_imprp[i4],v);
       }
     }
   }
@@ -690,7 +1074,8 @@ void ThrOMP::ev_tally_thr(Dihedral *dihed, int i1, int i2, int i3, int i4,
    fpair is magnitude of force on atom I
 ------------------------------------------------------------------------- */
 
-void ThrOMP::v_tally2_thr(int i, int j, double fpair, double *drij, int tid)
+void ThrOMP::v_tally2_thr(const int i, const int j, const double fpair,
+			  const double * const drij, ThrData * const thr)
 {
   double v[6];
   
@@ -701,10 +1086,8 @@ void ThrOMP::v_tally2_thr(int i, int j, double fpair, double *drij, int tid)
   v[4] = 0.5 * drij[0]*drij[2]*fpair;
   v[5] = 0.5 * drij[1]*drij[2]*fpair;
 
-  vatom_thr[tid][i][0] += v[0]; vatom_thr[tid][i][1] += v[1]; vatom_thr[tid][i][2] += v[2];
-  vatom_thr[tid][i][3] += v[3]; vatom_thr[tid][i][4] += v[4]; vatom_thr[tid][i][5] += v[5];
-  vatom_thr[tid][j][0] += v[0]; vatom_thr[tid][j][1] += v[1]; vatom_thr[tid][j][2] += v[2];
-  vatom_thr[tid][j][3] += v[3]; vatom_thr[tid][j][4] += v[4]; vatom_thr[tid][j][5] += v[5];
+  v_tally(thr->vatom_pair[i],v);
+  v_tally(thr->vatom_pair[j],v);
 }
 
 /* ----------------------------------------------------------------------
@@ -712,8 +1095,10 @@ void ThrOMP::v_tally2_thr(int i, int j, double fpair, double *drij, int tid)
    called by AIREBO and Tersoff potential, newton_pair is always on
 ------------------------------------------------------------------------- */
 
-void ThrOMP::v_tally3_thr(int i, int j, int k, double *fi, double *fj,
-			  double *drik, double *drjk, int tid)
+void ThrOMP::v_tally3_thr(const int i, const int j, const int k,
+			  const double * const fi, const double * const fj,
+			  const double * const drik, const double * const drjk,
+			  ThrData * const thr)
 {
   double v[6];
   
@@ -724,12 +1109,9 @@ void ThrOMP::v_tally3_thr(int i, int j, int k, double *fi, double *fj,
   v[4] = THIRD * (drik[0]*fi[2] + drjk[0]*fj[2]);
   v[5] = THIRD * (drik[1]*fi[2] + drjk[1]*fj[2]);
 
-  vatom_thr[tid][i][0] += v[0]; vatom_thr[tid][i][1] += v[1]; vatom_thr[tid][i][2] += v[2];
-  vatom_thr[tid][i][3] += v[3]; vatom_thr[tid][i][4] += v[4]; vatom_thr[tid][i][5] += v[5];
-  vatom_thr[tid][j][0] += v[0]; vatom_thr[tid][j][1] += v[1]; vatom_thr[tid][j][2] += v[2];
-  vatom_thr[tid][j][3] += v[3]; vatom_thr[tid][j][4] += v[4]; vatom_thr[tid][j][5] += v[5];
-  vatom_thr[tid][k][0] += v[0]; vatom_thr[tid][k][1] += v[1]; vatom_thr[tid][k][2] += v[2];
-  vatom_thr[tid][k][3] += v[3]; vatom_thr[tid][k][4] += v[4]; vatom_thr[tid][k][5] += v[5];
+  v_tally(thr->vatom_pair[i],v);
+  v_tally(thr->vatom_pair[j],v);
+  v_tally(thr->vatom_pair[k],v);
 }
 
 /* ----------------------------------------------------------------------
@@ -737,9 +1119,11 @@ void ThrOMP::v_tally3_thr(int i, int j, int k, double *fi, double *fj,
    called by AIREBO potential, newton_pair is always on
 ------------------------------------------------------------------------- */
 
-void ThrOMP::v_tally4_thr(int i, int j, int k, int m,
-			  double *fi, double *fj, double *fk,
-			  double *drim, double *drjm, double *drkm, int tid)
+void ThrOMP::v_tally4_thr(const int i, const int j, const int k, const int m,
+			  const double * const fi, const double * const fj,
+			  const double * const fk, const double * const drim,
+			  const double * const drjm, const double * const drkm,
+			  ThrData * const thr)
 {
   double v[6];
 
@@ -750,84 +1134,17 @@ void ThrOMP::v_tally4_thr(int i, int j, int k, int m,
   v[4] = 0.25 * (drim[0]*fi[2] + drjm[0]*fj[2] + drkm[0]*fk[2]);
   v[5] = 0.25 * (drim[1]*fi[2] + drjm[1]*fj[2] + drkm[1]*fk[2]);
 
-  vatom_thr[tid][i][0] += v[0]; vatom_thr[tid][i][1] += v[1]; vatom_thr[tid][i][2] += v[2];
-  vatom_thr[tid][i][3] += v[3]; vatom_thr[tid][i][4] += v[4]; vatom_thr[tid][i][5] += v[5];
-  vatom_thr[tid][j][0] += v[0]; vatom_thr[tid][j][1] += v[1]; vatom_thr[tid][j][2] += v[2];
-  vatom_thr[tid][j][3] += v[3]; vatom_thr[tid][j][4] += v[4]; vatom_thr[tid][j][5] += v[5];
-  vatom_thr[tid][k][0] += v[0]; vatom_thr[tid][k][1] += v[1]; vatom_thr[tid][k][2] += v[2];
-  vatom_thr[tid][k][3] += v[3]; vatom_thr[tid][k][4] += v[4]; vatom_thr[tid][k][5] += v[5];
-  vatom_thr[tid][m][0] += v[0]; vatom_thr[tid][m][1] += v[1]; vatom_thr[tid][m][2] += v[2];
-  vatom_thr[tid][m][3] += v[3]; vatom_thr[tid][m][4] += v[4]; vatom_thr[tid][m][5] += v[5];
-}
-
-/* ---------------------------------------------------------------------- */
-
-// set loop range thread id, and force array offset for threaded runs.
-double **ThrOMP::loop_setup_thr(double **f, int &ifrom, int &ito, int &tid,
-				int inum, int nall, int nthreads)
-{
-#if defined(_OPENMP)
-  tid = omp_get_thread_num();
-
-  // each thread works on a fixed chunk of atoms.
-  const int idelta = 1 + inum/nthreads;
-  ifrom = tid*idelta;
-  ito   = ifrom + idelta;
-  if (ito > inum)
-    ito = inum;
-
-  return f + nall*tid;
-#else
-  tid = 0;
-  ifrom = 0;
-  ito = inum;
-  return f;
-#endif
-}
-
-/* ---------------------------------------------------------------------- */
-
-// reduce per thread data into the first part of the data
-// array that is used for the non-threaded parts and reset
-// the temporary storage to 0.0. this routine depends on
-// multi-dimensional arrays like force stored in this order
-// x1,y1,z1,x2,y2,z2,...
-// we need to post a barrier to wait until all threads are done
-// with writing to the array .
-void ThrOMP::data_reduce_thr(double *dall, int nall, int nthreads,
-			     int ndim, int tid)
-{
-#if defined(_OPENMP)
-  // NOOP in non-threaded execution.
-  if (nthreads == 1) return;
-#pragma omp barrier
-  {
-    const int nvals = ndim*nall;
-    const int idelta = nvals/nthreads + 1;
-    const int ifrom = tid*idelta;
-    const int ito   = ((ifrom + idelta) > nvals) ? nvals : (ifrom + idelta);
-
-    for (int m = ifrom; m < ito; ++m) {
-      for (int n = 1; n < nthreads; ++n) {
-	dall[m] += dall[n*nvals + m];
-	dall[n*nvals + m] = 0.0;
-      }
-    }
-  }
-#else
-  // NOOP in non-threaded execution.
-  return;
-#endif
+  v_tally(thr->vatom_pair[i],v);
+  v_tally(thr->vatom_pair[j],v);
+  v_tally(thr->vatom_pair[k],v);
+  v_tally(thr->vatom_pair[m],v);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double ThrOMP::memory_usage_thr() 
 {
-  const int nthreads=lmp->comm->nthreads;
-
-  double bytes = nthreads * (3 + 7) * sizeof(double);
-  bytes += nthreads * maxeatom_thr * sizeof(double);
-  bytes += nthreads * maxvatom_thr * 6 * sizeof(double);
+  double bytes=0.0;
+  
   return bytes;
 }
diff --git a/src/USER-OMP/thr_omp.h b/src/USER-OMP/thr_omp.h
index 9966c9de00e85bd77e54e7034ec22deb992816d2..32f7045124b4a4dc75734c105d14c587a72a75ff 100644
--- a/src/USER-OMP/thr_omp.h
+++ b/src/USER-OMP/thr_omp.h
@@ -19,39 +19,28 @@
 #define LMP_THR_OMP_H
 
 #include "pointers.h"
+#include "fix_omp.h"
+#include "thr_data.h"
 
 namespace LAMMPS_NS {
 
 // forward declarations
 class Pair;
+class Bond;
+class Angle;
 class Dihedral;
+class Improper;
+class KSpace;
+class Fix;
 
 class ThrOMP {
- public:
-  struct global {
-    double eng_vdwl;
-    double eng_coul;
-    double eng_bond;
-    double virial[6];
-  };
 
  protected:
-  const int thr_style;
-  enum {PAIR=1, BOND, ANGLE, DIHEDRAL, IMPROPER, KSPACE, FIX, COMPUTE};
-
-  LAMMPS *lmp;           // reference to base lammps object.
-
-  double *eng_vdwl_thr;  // per thread accumulated vdw energy
-  double *eng_coul_thr;  // per thread accumulated coulomb energies
-  double *eng_bond_thr;  // per thread accumlated bonded energy
+  LAMMPS *lmp; // reference to base lammps object.
+  FixOMP *fix; // pointer to fix_omp;
 
-  double **virial_thr;   // per thread virial
-  double **eatom_thr;    // per thread per atom energy
-  double ***vatom_thr;   // per thread per atom virial
+  const int thr_style;
 
-  int maxeatom_thr, maxvatom_thr;
-  int evflag_global, evflag_atom;
-  
  public:
   ThrOMP(LAMMPS *, int);
   virtual ~ThrOMP();
@@ -65,50 +54,105 @@ class ThrOMP {
       { ; }
     };
 
+  enum {THR_NONE=0,THR_PAIR=1,THR_BOND=1<<1,THR_ANGLE=1<<2,
+	THR_DIHEDRAL=1<<3,THR_IMPROPER=1<<4,THR_KSPACE=1<<5,
+	THR_CHARMM=1<<6,THR_PROXY=1<<7,THR_HYBRID=1<<8,THR_FIX=1<<9};
+
  protected:
-  // extra ev_tally work for threaded styles
-  void ev_setup_thr(Pair *);
-  void ev_setup_thr(Dihedral *);
+  // extra ev_tally setup work for threaded styles
+  void ev_setup_thr(int, int, int, double *, double **, ThrData *);
 
-  void ev_reduce_thr(Pair *);
-  void ev_reduce_thr(Dihedral *);
+  // compute global per thread virial contribution from per-thread force
+  void virial_fdotr_compute_thr(double * const, const double * const * const, 
+				const double * const * const,
+				const int, const int, const int);
 
- private:
-  // internal method to be used by multiple ev_setup_thr() methods
-  void ev_setup_acc_thr(int, int, int, int, int, int);
+  // reduce per thread data as needed
+  void reduce_thr(void * const style, const int eflag, const int vflag, ThrData * const thr, const int nproxy=0);
 
  protected:
+
   // threading adapted versions of the ev_tally infrastructure
   // style specific versions (need access to style class flags)
-  void ev_tally_thr(Pair *, int, int, int, int, double, double,
-		    double, double, double, double, int);
-  void ev_tally_xyz_thr(Pair *, int, int, int, int, double, double,
-			double, double, double, double, double, double, int);
-  void ev_tally3_thr(Pair *, int, int, int, double, double,
-		     double *, double *, double *, double *, int);
-  void ev_tally4_thr(Pair *, int, int, int, int, double, 
-		     double *, double *, double *,
-		     double *, double *, double *, int);
-  void ev_tally_list_thr(Pair *, int, int *, double , double *, int);
-
-  void ev_tally_thr(Dihedral *, int, int, int, int, int, int, double,
-		    double *, double *, double *, double, double, double,
-		    double, double, double, double, double, double, int);
 
-  // style independent versions
-  void v_tally2_thr(int, int, double, double *, int);
-  void v_tally3_thr(int, int, int, double *, double *, double *, double *, int);
-  void v_tally4_thr(int, int, int, int, double *, double *, double *,
-		    double *, double *, double *, int);
+  // Pair
+  void e_tally_thr(Pair * const, const int, const int, const int,
+		   const int, const double, const double, ThrData * const);
+  void v_tally_thr(Pair * const, const int, const int, const int,
+		   const int, const double * const, ThrData * const);
+
+  void ev_tally_thr(Pair * const, const int, const int, const int, const int,
+		    const double, const double, const double, const double,
+		    const double, const double, ThrData * const);
+  void ev_tally_xyz_thr(Pair * const, const int, const int, const int,
+			const int, const double, const double, const double,
+			const double, const double, const double,
+			const double, const double, ThrData * const);
+  void ev_tally3_thr(Pair * const, const int, const int, const int, const double,
+		     const double, const double * const, const double * const,
+		     const double * const, const double * const, ThrData * const);
+  void ev_tally4_thr(Pair * const, const int, const int, const int, const int,
+		     const double, const double * const, const double * const,
+		     const double * const, const double * const, const double * const,
+		     const double * const, ThrData * const);
+
+  // Bond
+  void ev_tally_thr(Bond * const, const int, const int, const int, const int,
+		    const double, const double, const double, const double,
+		    const double, ThrData * const);
+
+  // Angle
+  void ev_tally_thr(Angle * const, const int, const int, const int, const int, const int,
+		    const double, const double * const, const double * const,
+		    const double, const double, const double, const double, const double,
+		    const double, ThrData * const thr);
+  void ev_tally13_thr(Angle * const, const int, const int, const int, const int,
+		      const double, const double, const double, const double,
+		      const double, ThrData * const thr);
+
+  // Dihedral
+  void ev_tally_thr(Dihedral * const, const int, const int, const int, const int, const int,
+		    const int, const double, const double * const, const double * const,
+		    const double * const, const double, const double, const double,
+		    const double, const double, const double, const double, const double,
+		    const double, ThrData * const);
+
+  // Improper
+  void ev_tally_thr(Improper * const, const int, const int, const int, const int, const int,
+		    const int, const double, const double * const, const double * const,
+		    const double * const, const double, const double, const double,
+		    const double, const double, const double, const double, const double,
+		    const double, ThrData * const);
 
- protected:
-  // set loop range, thread id, and force array offset for threaded runs.
-  double **loop_setup_thr(double **, int &, int &, int &, int, int, int);
-
-  // reduce per thread data into the first part of the array
-  void data_reduce_thr(double *, int, int, int, int);
+  // style independent versions
+  void v_tally2_thr(const int, const int, const double, const double * const, ThrData * const);
+  void v_tally3_thr(const int, const int, const int, const double * const, const double * const,
+		    const double * const, const double * const, ThrData * const);
+  void v_tally4_thr(const int, const int, const int, const int, const double * const,
+		    const double * const, const double * const, const double * const,
+		    const double * const, const double * const, ThrData * const);
+  void ev_tally_list_thr(Pair * const, const int, const int * const,
+			 const double , const double * const , ThrData * const);
 
 };
 
+// set loop range thread id, and force array offset for threaded runs.
+static inline void loop_setup_thr(int &ifrom, int &ito, int &tid,
+				  int inum, int nthreads, int nproxy=0)
+{
+#if defined(_OPENMP)
+  tid = omp_get_thread_num();
+
+  // each thread works on a fixed chunk of atoms.
+  const int idelta = 1 + inum/(nthreads-nproxy);
+  ifrom = (tid-nproxy)*idelta;
+  ito   = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
+#else
+  tid = 0;
+  ifrom = 0;
+  ito = inum;
+#endif
+}
+
 }
 #endif