diff --git a/src/USER-OMP/fix_neigh_history_omp.cpp b/src/USER-OMP/fix_neigh_history_omp.cpp
index 05b8fa41b9cefc72355101ec6180fc086d095b0d..ecc3147ed5d0515dfb13d743531ed28599e11b40 100644
--- a/src/USER-OMP/fix_neigh_history_omp.cpp
+++ b/src/USER-OMP/fix_neigh_history_omp.cpp
@@ -21,6 +21,7 @@
 #include "force.h"
 #include "pair.h"
 #include "update.h"
+#include "memory.h"
 #include "modify.h"
 #include "error.h"
 
@@ -31,37 +32,48 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
+enum{DEFAULT,NPARTNER,PERPARTNER}; // also set in fix neigh/history
+
 
 FixNeighHistoryOMP::FixNeighHistoryOMP(class LAMMPS *lmp,int narg,char **argv)
   : FixNeighHistory(lmp,narg,argv) {
 
-  if (onesided)
-    error->all(FLERR,"tri/lj and line/lj are not supported by USER-OMP");
-  if (!newton_pair)
-    error->all(FLERR,"Newton off for granular is not supported by USER-OMP");
 }
 
 
 /* ----------------------------------------------------------------------
- copy partner info from neighbor data structs (NDS) to atom arrays
- should be called whenever NDS store current history info
-   and need to transfer the info to owned atoms
- e.g. when atoms migrate to new procs, new neigh list built, or between runs
-   when atoms may be added or deleted (NDS becomes out-of-date)
- the next post_neighbor() will put this info back into new NDS
- called during run before atom exchanges, including for restart files
- called at end of run via post_run()
- do not call during setup of run (setup_pre_exchange)
-   b/c there is no guarantee of a current NDS (even on continued run)
- if run command does a 2nd run with pre = no, then no neigh list
-   will be built, but old neigh list will still have the info
- 
- the USER-OMP version only supports newton on
+   copy partner info from neighbor data structs (NDS) to atom arrays
+   should be called whenever NDS store current history info
+     and need to transfer the info to owned atoms
+   e.g. when atoms migrate to new procs, new neigh list built, or between runs
+     when atoms may be added or deleted (NDS becomes out-of-date)
+   the next post_neighbor() will put this info back into new NDS
+   called during run before atom exchanges, including for restart files
+   called at end of run via post_run()
+   do not call during setup of run (setup_pre_exchange)
+     b/c there is no guarantee of a current NDS (even on continued run)
+   if run command does a 2nd run with pre = no, then no neigh list
+     will be built, but old neigh list will still have the info
+   onesided and newton on and newton off versions
 ------------------------------------------------------------------------- */
+// below is the pre_exchange() function from the parent class
+// void FixNeighHistory::pre_exchange()
+// {
+//  if (onesided) pre_exchange_onesided();
+//  else if (newton_pair) pre_exchange_newton();
+//  else pre_exchange_no_newton();
+//}
 
-void FixNeighHistoryOMP::pre_exchange()
+/* ----------------------------------------------------------------------
+   onesided version for sphere contact with line/tri particles
+   neighbor list has I = sphere, J = line/tri
+   only store history info with spheres
+------------------------------------------------------------------------- */
+
+void FixNeighHistoryOMP::pre_exchange_onesided()
 {
   const int nthreads = comm->nthreads;
+  const int nlocal = atom->nlocal;
   maxpartner = 0;
 
 #if defined(_OPENMP)
@@ -77,7 +89,131 @@ void FixNeighHistoryOMP::pre_exchange()
 
     int i,j,ii,jj,m,n,inum,jnum;
     int *ilist,*jlist,*numneigh,**firstneigh;
-    int *allflags,**firstflag;
+    int *allflags;
+    double *allvalues,*onevalues;
+
+    // NOTE: all operations until very end are with:
+    //   nlocal_neigh <= current nlocal
+    // b/c previous neigh list was built with nlocal_neigh
+    // nlocal can be larger if other fixes added atoms at this pre_exchange()
+
+    // clear per-thread paged data structures
+    
+    MyPage <tagint> &ipg = ipage_atom[tid];
+    MyPage <double> &dpg = dpage_atom[tid];
+    ipg.reset();
+    dpg.reset();
+
+    // each thread works on a fixed chunk of local and ghost atoms.
+    const int ldelta = 1 + nlocal_neigh/nthreads;
+    const int lfrom = tid*ldelta;
+    const int lmax = lfrom +ldelta;
+    const int lto = (lmax > nlocal_neigh) ? nlocal_neigh : lmax;
+
+    // 1st loop over neighbor list, I = sphere, J = tri
+    // only calculate npartner for each owned spheres
+
+    for (i = lfrom; i < lto; i++) npartner[i] = 0;
+
+    tagint *tag = atom->tag;
+    NeighList *list = pair->list;
+    inum = list->inum;
+    ilist = list->ilist;
+    numneigh = list->numneigh;
+    firstneigh = list->firstneigh;
+
+    for (ii = 0; ii < inum; ii++) {
+      i = ilist[ii];
+      jlist = firstneigh[i];
+      jnum = numneigh[i];
+      allflags = firstflag[i];
+
+      for (jj = 0; jj < jnum; jj++)
+        if (allflags[jj]) 
+          if ((i >= lfrom) && (i < lto)) npartner[i]++;
+    }
+
+    // get page chunks to store atom IDs and shear history for my atoms
+
+    for (ii = 0; ii < inum; ii++) {
+      i = ilist[ii];
+      if ((i >= lfrom) && (i < lto)) {
+        n = npartner[i];
+        partner[i] = ipg.get(n);
+        valuepartner[i] = dpg.get(dnum*n);
+        if (partner[i] == NULL || valuepartner[i] == NULL)
+          error->one(FLERR,"Neighbor history overflow, boost neigh_modify one");
+      }
+    }
+
+    // 2nd loop over neighbor list
+    // store partner IDs and values for owned+ghost atoms
+    // re-zero npartner to use as counter
+
+    for (i = lfrom; i < lto; i++) npartner[i] = 0;
+
+    for (ii = 0; ii < inum; ii++) {
+      i = ilist[ii];
+      jlist = firstneigh[i];
+      jnum = numneigh[i];
+      allflags = firstflag[i];
+      allvalues = firstvalue[i];
+
+      for (jj = 0; jj < jnum; jj++) {
+        if (allflags[jj]) {
+          onevalues = &allvalues[dnum*jj];
+          j = jlist[jj];
+          j &= NEIGHMASK;
+
+          if ((i >= lfrom) && (i < lto)) {
+            m = npartner[i]++;
+            partner[i][m] = tag[j];
+            memcpy(&valuepartner[i][dnum*m],onevalues,dnumbytes);
+          }
+        }
+      }
+    }
+
+    // set maxpartner = max # of partners of any owned atom
+    maxpartner = m = 0;
+    for (i = lfrom; i < lto; i++)
+      m = MAX(m,npartner[i]);
+
+#if defined(_OPENMP)
+#pragma omp critical
+#endif
+    {
+      maxpartner = MAX(m,maxpartner);
+      comm->maxexchange_fix =MAX(comm->maxexchange_fix,(dnum+1)*maxpartner+1);
+    }
+  }
+
+  // zero npartner values from previous nlocal_neigh to current nlocal
+  for (int i = nlocal_neigh; i < nlocal; ++i) npartner[i] = 0;
+}
+
+/* -------------------------------------------------------------------- */
+
+void FixNeighHistoryOMP::pre_exchange_newton()
+{
+  const int nthreads = comm->nthreads;
+  maxpartner = 0;
+  for (int i = 0; i < nall_neigh; i++) npartner[i] = 0;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none)
+#endif
+  {
+
+#if defined(_OPENMP)
+    const int tid = omp_get_thread_num();
+#else
+    const int tid = 0;
+#endif
+
+    int i,j,ii,jj,m,n,inum,jnum;
+    int *ilist,*jlist,*numneigh,**firstneigh;
+    int *allflags;
     double *allvalues,*onevalues,*jvalues;
 
     MyPage <tagint> &ipg = ipage_atom[tid];
@@ -86,7 +222,7 @@ void FixNeighHistoryOMP::pre_exchange()
     dpg.reset();
 
     // 1st loop over neighbor list
-    // calculate npartner for each owned atom
+    // calculate npartner for each owned+ghost atom
 
     tagint *tag = atom->tag;
 
@@ -96,8 +232,178 @@ void FixNeighHistoryOMP::pre_exchange()
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
 
-    int nlocal_neigh = 0;
-    if (inum) nlocal_neigh = ilist[inum-1] + 1;
+    // each thread works on a fixed chunk of local and ghost atoms.
+    const int ldelta = 1 + nlocal_neigh/nthreads;
+    const int lfrom = tid*ldelta;
+    const int lmax = lfrom +ldelta;
+    const int lto = (lmax > nlocal_neigh) ? nlocal_neigh : lmax;
+
+    for (ii = 0; ii < inum; ii++) {
+      i = ilist[ii];
+      jlist = firstneigh[i];
+      jnum = numneigh[i];
+      allflags = firstflag[i];
+
+      for (jj = 0; jj < jnum; jj++) {
+        if (allflags[jj]) {
+          if ((i >= lfrom) && (i < lto))
+            npartner[i]++;
+
+          j = jlist[jj];
+          j &= NEIGHMASK;
+          if ((j >= lfrom) && (j < lto))
+            npartner[j]++;
+        }
+      }
+    }
+#if defined(_OPENMP)    
+#pragma omp barrier
+    {;}
+
+    // perform reverse comm to augment owned npartner counts with ghost counts
+
+#pragma omp master
+#endif
+    {
+      commflag = NPARTNER;
+      comm->reverse_comm_fix(this,0);
+    }
+
+    // get page chunks to store atom IDs and shear history for my atoms
+
+    for (ii = 0; ii < inum; ii++) {
+      i = ilist[ii];
+      if ((i >= lfrom) && (i < lto)) {
+        n = npartner[i];
+        partner[i] = ipg.get(n);
+        valuepartner[i] = dpg.get(dnum*n);
+        if (partner[i] == NULL || valuepartner[i] == NULL)
+          error->one(FLERR,"Neighbor history overflow, boost neigh_modify one");
+      }
+    }
+
+#if defined(_OPENMP)
+#pragma omp master
+#endif
+    {
+      for (i = nlocal_neigh; i < nall_neigh; i++) {
+        n = npartner[i];
+        partner[i] = ipg.get(n);
+        valuepartner[i] = dpg.get(dnum*n);
+        if (partner[i] == NULL || valuepartner[i] == NULL) {
+          error->one(FLERR,"Neighbor history overflow, boost neigh_modify one");
+        }
+      }
+    }
+
+    // 2nd loop over neighbor list
+    // store partner IDs and values for owned+ghost atoms
+    // re-zero npartner to use as counter
+
+    for (i = lfrom; i < lto; i++) npartner[i] = 0;
+
+    for (ii = 0; ii < inum; ii++) {
+      i = ilist[ii];
+      jlist = firstneigh[i];
+      jnum = numneigh[i];
+      allflags = firstflag[i];
+      allvalues = firstvalue[i];
+
+      for (jj = 0; jj < jnum; jj++) {
+        if (allflags[jj]) {
+          onevalues = &allvalues[dnum*jj];
+          j = jlist[jj];
+          j &= NEIGHMASK;
+
+          if ((i >= lfrom) && (i < lto)) {
+            m = npartner[i]++;
+            partner[i][m] = tag[j];
+            memcpy(&valuepartner[i][dnum*m],onevalues,dnumbytes);
+          }
+
+          if ((j >= lfrom) && (j < lto)) {
+            m = npartner[j]++;
+            partner[j][m] = tag[i];
+            jvalues = &valuepartner[j][dnum*m];
+            for (n = 0; n < dnum; n++) jvalues[n] = -onevalues[n];
+          }
+        }
+      }
+    }
+#if defined(_OPENMP)    
+#pragma omp barrier
+    {;}
+
+#pragma omp master
+#endif
+    {
+      // perform reverse comm to augment
+      // owned atom partner/valuepartner with ghost info
+      // use variable variant b/c size of packed data can be arbitrarily large
+      //   if many touching neighbors for large particle
+
+      commflag = PERPARTNER;
+      comm->reverse_comm_fix_variable(this);
+    }
+
+    // set maxpartner = max # of partners of any owned atom
+    m = 0;
+    for (i = lfrom; i < lto; i++)
+      m = MAX(m,npartner[i]);
+
+#if defined(_OPENMP)
+#pragma omp critical
+#endif
+    {
+      maxpartner = MAX(m,maxpartner);
+      comm->maxexchange_fix = MAX(comm->maxexchange_fix,4*maxpartner+1);
+    }
+  }
+
+  // zero npartner values from previous nlocal_neigh to current nlocal
+
+  int nlocal = atom->nlocal;
+  for (int i = nlocal_neigh; i < nlocal; i++) npartner[i] = 0;
+}
+
+/* -------------------------------------------------------------------- */
+
+void FixNeighHistoryOMP::pre_exchange_no_newton()
+{
+  const int nthreads = comm->nthreads;
+  maxpartner = 0;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none)
+#endif
+  {
+
+#if defined(_OPENMP)
+    const int tid = omp_get_thread_num();
+#else
+    const int tid = 0;
+#endif
+
+    int i,j,ii,jj,m,n,inum,jnum;
+    int *ilist,*jlist,*numneigh,**firstneigh;
+    int *allflags;
+    double *allvalues,*onevalues,*jvalues;
+
+    MyPage <tagint> &ipg = ipage_atom[tid];
+    MyPage <double> &dpg = dpage_atom[tid];
+    ipg.reset();
+    dpg.reset();
+
+    // 1st loop over neighbor list
+    // calculate npartner for each owned atom
+
+    tagint *tag = atom->tag;
+
+    NeighList *list = pair->list;
+    inum = list->inum;
+    ilist = list->ilist;
+    numneigh = list->numneigh;
+    firstneigh = list->firstneigh;
 
     // each thread works on a fixed chunk of local and ghost atoms.
     const int ldelta = 1 + nlocal_neigh/nthreads;
@@ -158,7 +464,7 @@ void FixNeighHistoryOMP::pre_exchange()
 
       for (jj = 0; jj < jnum; jj++) {
         if (allflags[jj]) {
-          onevalues = &allvalues[3*jj];
+          onevalues = &allvalues[dnum*jj];
           j = jlist[jj];
           j &= NEIGHMASK;
 
@@ -179,13 +485,119 @@ void FixNeighHistoryOMP::pre_exchange()
     }
 
     // set maxpartner = max # of partners of any owned atom
-    maxpartner = m = 0;
+    m = 0;
     for (i = lfrom; i < lto; i++)
       m = MAX(m,npartner[i]);
 
 #if defined(_OPENMP)
 #pragma omp critical
 #endif
-    maxpartner = MAX(m,maxpartner);
+    {
+      maxpartner = MAX(m,maxpartner);
+      comm->maxexchange_fix = MAX(comm->maxexchange_fix,(dnum+1)*maxpartner+1);
+    }
+  }
+}
+
+/* -------------------------------------------------------------------- */
+
+void FixNeighHistoryOMP::post_neighbor()
+{
+  const int nthreads = comm->nthreads;
+  maxpartner = 0;
+  const int nlocal = atom->nlocal;
+  const int nall = nlocal + atom->nghost;
+  nlocal_neigh = nlocal;
+  nall_neigh = nall;
+
+  // realloc firstflag and firstvalue if needed
+
+  if (maxatom < nlocal) {
+    memory->sfree(firstflag);
+    memory->sfree(firstvalue);
+    maxatom = nall;
+    firstflag = (int **) 
+      memory->smalloc(maxatom*sizeof(int *),"neighbor_history:firstflag");
+    firstvalue = (double **) 
+      memory->smalloc(maxatom*sizeof(double *),"neighbor_history:firstvalue");
+  }
+
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none)
+#endif
+  {
+
+#if defined(_OPENMP)
+    const int tid = omp_get_thread_num();
+#else
+    const int tid = 0;
+#endif
+
+    int i,j,ii,jj,m,nn,np,inum,jnum,rflag;
+    tagint jtag;
+    int *ilist,*jlist,*numneigh,**firstneigh;
+    int *allflags;
+    double *allvalues;
+
+    MyPage <tagint> &ipg = ipage_atom[tid];
+    MyPage <double> &dpg = dpage_atom[tid];
+    ipg.reset();
+    dpg.reset();
+
+    // 1st loop over neighbor list
+    // calculate npartner for each owned atom
+
+    tagint *tag = atom->tag;
+
+    NeighList *list = pair->list;
+    inum = list->inum;
+    ilist = list->ilist;
+    numneigh = list->numneigh;
+    firstneigh = list->firstneigh;
+
+    // each thread works on a fixed chunk of local and ghost atoms.
+    const int ldelta = 1 + inum/nthreads;
+    const int lfrom = tid*ldelta;
+    const int lmax = lfrom +ldelta;
+    const int lto = (lmax > inum) ? inum : lmax;
+
+    for (ii = lfrom; ii < lto; ii++) {
+      i = ilist[ii];
+      jlist = firstneigh[i];
+      jnum = numneigh[i];
+      firstflag[i] = allflags = ipg.get(jnum);
+      firstvalue[i] = allvalues = dpg.get(jnum*dnum);
+      np = npartner[i];
+      nn = 0;
+
+      for (jj = 0; jj < jnum; jj++) {
+        j = jlist[jj];
+        rflag = sbmask(j);
+        j &= NEIGHMASK;
+        jlist[jj] = j;
+        
+        // rflag = 1 if r < radsum in npair_size() method
+        // preserve neigh history info if tag[j] is in old-neigh partner list
+        // this test could be more geometrically precise for two sphere/line/tri
+
+        if (rflag) {
+          jtag = tag[j];
+          for (m = 0; m < np; m++)
+            if (partner[i][m] == jtag) break;
+          if (m < np) {
+            allflags[jj] = 1;
+            memcpy(&allvalues[nn],&valuepartner[i][dnum*m],dnumbytes);
+          } else {
+            allflags[jj] = 0;
+            memcpy(&allvalues[nn],zeroes,dnumbytes);
+          }
+        } else {
+          allflags[jj] = 0;
+          memcpy(&allvalues[nn],zeroes,dnumbytes);
+        }
+        nn += dnum;
+      }
+    }
   }
 }
diff --git a/src/USER-OMP/fix_neigh_history_omp.h b/src/USER-OMP/fix_neigh_history_omp.h
index 83472391a0ff52a7e3118699b6c70a70eddae58e..9cd97ce3da76db17ca49727d433028971790b550 100644
--- a/src/USER-OMP/fix_neigh_history_omp.h
+++ b/src/USER-OMP/fix_neigh_history_omp.h
@@ -28,7 +28,10 @@ class FixNeighHistoryOMP : public FixNeighHistory {
 
  public:
   FixNeighHistoryOMP(class LAMMPS *lmp, int narg, char **argv);
-  virtual void pre_exchange();
+  void pre_exchange_onesided();
+  void pre_exchange_newton();
+  void pre_exchange_no_newton();
+  void post_neighbor();
 };
 
 }
diff --git a/src/fix_neigh_history.cpp b/src/fix_neigh_history.cpp
index de94fbecefb034e8a97732ea5667bd98db6e4683..322c8d55619abd0ef17b0a7788523f978261e53b 100644
--- a/src/fix_neigh_history.cpp
+++ b/src/fix_neigh_history.cpp
@@ -29,7 +29,7 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-enum{DEFAULT,NPARTNER,PERPARTNER};
+enum{DEFAULT,NPARTNER,PERPARTNER}; // also set in fix neigh/history/omp
 
 /* ---------------------------------------------------------------------- */
 
@@ -554,8 +554,6 @@ void FixNeighHistory::post_neighbor()
 {
   int i,j,m,ii,jj,nn,np,inum,jnum,rflag;
   tagint jtag;
-  double xtmp,ytmp,ztmp,delx,dely,delz;
-  double radi,rsq,radsum;
   int *ilist,*jlist,*numneigh,**firstneigh;
   int *allflags;
   double *allvalues;
diff --git a/src/fix_neigh_history.h b/src/fix_neigh_history.h
index 64cc11efec8622186f36d7bcbd9518f89d2e695e..7aed2d60357dfb81c0ff13ee357214f0c5dda4dc 100644
--- a/src/fix_neigh_history.h
+++ b/src/fix_neigh_history.h
@@ -38,9 +38,9 @@ class FixNeighHistory : public Fix {
   int setmask();
   void init();
   void setup_post_neighbor();
-  virtual void pre_exchange();
+  void pre_exchange();
   void min_pre_exchange();
-  void post_neighbor();
+  virtual void post_neighbor();
   void min_post_neighbor();
   void post_run();
 
@@ -87,9 +87,9 @@ class FixNeighHistory : public Fix {
   MyPage<int> *ipage_neigh;     // pages of local atom indices
   MyPage<double> *dpage_neigh;  // pages of partner values
 
-  void pre_exchange_onesided();
-  void pre_exchange_newton();
-  void pre_exchange_no_newton();
+  virtual void pre_exchange_onesided();
+  virtual void pre_exchange_newton();
+  virtual void pre_exchange_no_newton();
   void allocate_pages();
 
   inline int sbmask(int j) const {