diff --git a/src/USER-OMP/angle_charmm_omp.cpp b/src/USER-OMP/angle_charmm_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..989bb08ef93f0d3bafc280ade46a123e9fcba672
--- /dev/null
+++ b/src/USER-OMP/angle_charmm_omp.cpp
@@ -0,0 +1,191 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "angle_charmm_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+
+#include "math_const.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define SMALL 0.001
+
+/* ---------------------------------------------------------------------- */
+
+void AngleCharmmOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nanglelist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void AngleCharmmOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,i3,n,type;
+  double delx1,dely1,delz1,delx2,dely2,delz2;
+  double eangle,f1[3],f3[3];
+  double dtheta,tk;
+  double rsq1,rsq2,r1,r2,c,s,a,a11,a12,a22;
+  double delxUB,delyUB,delzUB,rsqUB,rUB,dr,rk,forceUB;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const anglelist = neighbor->anglelist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = anglelist[n][0];
+    i2 = anglelist[n][1];
+    i3 = anglelist[n][2];
+    type = anglelist[n][3];
+
+    // 1st bond
+
+    delx1 = x[i1][0] - x[i2][0];
+    dely1 = x[i1][1] - x[i2][1];
+    delz1 = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx1,dely1,delz1);
+
+    rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
+    r1 = sqrt(rsq1);
+
+    // 2nd bond
+
+    delx2 = x[i3][0] - x[i2][0];
+    dely2 = x[i3][1] - x[i2][1];
+    delz2 = x[i3][2] - x[i2][2];
+    domain->minimum_image(delx2,dely2,delz2);
+
+    rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+    r2 = sqrt(rsq2);
+
+    // Urey-Bradley bond
+
+    delxUB = x[i3][0] - x[i1][0];
+    delyUB = x[i3][1] - x[i1][1];
+    delzUB = x[i3][2] - x[i1][2];
+    domain->minimum_image(delxUB,delyUB,delzUB);
+
+    rsqUB = delxUB*delxUB + delyUB*delyUB + delzUB*delzUB;
+    rUB = sqrt(rsqUB);
+
+    // Urey-Bradley force & energy
+
+    dr = rUB - r_ub[type];
+    rk = k_ub[type] * dr;
+
+    if (rUB > 0.0) forceUB = -2.0*rk/rUB;
+    else forceUB = 0.0;
+
+    if (EFLAG) eangle = rk*dr;
+
+    // angle (cos and sin)
+
+    c = delx1*delx2 + dely1*dely2 + delz1*delz2;
+    c /= r1*r2;
+        
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+        
+    s = sqrt(1.0 - c*c);
+    if (s < SMALL) s = SMALL;
+    s = 1.0/s;
+
+    // harmonic force & energy
+
+    dtheta = acos(c) - theta0[type];
+    tk = k[type] * dtheta;
+
+    if (EFLAG) eangle += tk*dtheta;
+
+    a = -2.0 * tk * s;
+    a11 = a*c / rsq1;
+    a12 = -a / (r1*r2);
+    a22 = a*c / rsq2;
+
+    f1[0] = a11*delx1 + a12*delx2 - delxUB*forceUB;
+    f1[1] = a11*dely1 + a12*dely2 - delyUB*forceUB;
+    f1[2] = a11*delz1 + a12*delz2 - delzUB*forceUB;
+
+    f3[0] = a22*delx2 + a12*delx1 + delxUB*forceUB;
+    f3[1] = a22*dely2 + a12*dely1 + delyUB*forceUB;
+    f3[2] = a22*delz2 + a12*delz1 + delzUB*forceUB;
+
+    // apply force to each of 3 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0];
+      f[i1][1] += f1[1];
+      f[i1][2] += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= f1[0] + f3[0];
+      f[i2][1] -= f1[1] + f3[1];
+      f[i2][2] -= f1[2] + f3[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0];
+      f[i3][1] += f3[1];
+      f[i3][2] += f3[2];
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3,
+			     delx1,dely1,delz1,delx2,dely2,delz2,thr);
+  }
+}
diff --git a/src/USER-OMP/angle_charmm_omp.h b/src/USER-OMP/angle_charmm_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..77ed6c74d7f8bc7525237b507a76c59cdfdb0b70
--- /dev/null
+++ b/src/USER-OMP/angle_charmm_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef ANGLE_CLASS
+
+AngleStyle(charmm/omp,AngleCharmmOMP)
+
+#else
+
+#ifndef LMP_ANGLE_CHARMM_OMP_H
+#define LMP_ANGLE_CHARMM_OMP_H
+
+#include "angle_charmm.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class AngleCharmmOMP : public AngleCharmm, public ThrOMP {
+
+ public:
+    AngleCharmmOMP(class LAMMPS *lmp) : 
+      AngleCharmm(lmp), ThrOMP(lmp,THR_ANGLE) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/angle_class2_omp.cpp b/src/USER-OMP/angle_class2_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..eb2cde294a06ccf29c2824368c2feabcb1bb21d2
--- /dev/null
+++ b/src/USER-OMP/angle_class2_omp.cpp
@@ -0,0 +1,234 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "angle_class2_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+
+#include "math_const.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define SMALL 0.001
+
+/* ---------------------------------------------------------------------- */
+
+void AngleClass2OMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nanglelist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void AngleClass2OMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,i3,n,type;
+  double delx1,dely1,delz1,delx2,dely2,delz2;
+  double eangle,f1[3],f3[3];
+  double dtheta,dtheta2,dtheta3,dtheta4,de_angle;
+  double dr1,dr2,tk1,tk2,aa1,aa2,aa11,aa12,aa21,aa22;
+  double rsq1,rsq2,r1,r2,c,s,a,a11,a12,a22,b1,b2;
+  double vx11,vx12,vy11,vy12,vz11,vz12,vx21,vx22,vy21,vy22,vz21,vz22;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const anglelist = neighbor->anglelist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = anglelist[n][0];
+    i2 = anglelist[n][1];
+    i3 = anglelist[n][2];
+    type = anglelist[n][3];
+
+    // 1st bond
+
+    delx1 = x[i1][0] - x[i2][0];
+    dely1 = x[i1][1] - x[i2][1];
+    delz1 = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx1,dely1,delz1);
+
+    rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
+    r1 = sqrt(rsq1);
+
+    // 2nd bond
+
+    delx2 = x[i3][0] - x[i2][0];
+    dely2 = x[i3][1] - x[i2][1];
+    delz2 = x[i3][2] - x[i2][2];
+    domain->minimum_image(delx2,dely2,delz2);
+
+    rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+    r2 = sqrt(rsq2);
+
+    // angle (cos and sin)
+
+    c = delx1*delx2 + dely1*dely2 + delz1*delz2;
+    c /= r1*r2;
+        
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+        
+    s = sqrt(1.0 - c*c);
+    if (s < SMALL) s = SMALL;
+    s = 1.0/s;
+
+    // force & energy for angle term
+
+    dtheta = acos(c) - theta0[type];
+    dtheta2 = dtheta*dtheta;
+    dtheta3 = dtheta2*dtheta;
+    dtheta4 = dtheta3*dtheta;
+
+    de_angle = 2.0*k2[type]*dtheta + 3.0*k3[type]*dtheta2 + 
+      4.0*k4[type]*dtheta3;
+
+    a = -de_angle*s;
+    a11 = a*c / rsq1;
+    a12 = -a / (r1*r2);
+    a22 = a*c / rsq2;
+
+    f1[0] = a11*delx1 + a12*delx2;
+    f1[1] = a11*dely1 + a12*dely2;
+    f1[2] = a11*delz1 + a12*delz2;
+
+    f3[0] = a22*delx2 + a12*delx1;
+    f3[1] = a22*dely2 + a12*dely1;
+    f3[2] = a22*delz2 + a12*delz1;
+
+    if (EFLAG) eangle = k2[type]*dtheta2 + k3[type]*dtheta3 + k4[type]*dtheta4;
+
+    // force & energy for bond-bond term
+
+    dr1 = r1 - bb_r1[type];
+    dr2 = r2 - bb_r2[type];
+    tk1 = bb_k[type] * dr1;
+    tk2 = bb_k[type] * dr2;
+
+    f1[0] -= delx1*tk2/r1;
+    f1[1] -= dely1*tk2/r1;
+    f1[2] -= delz1*tk2/r1;
+
+    f3[0] -= delx2*tk1/r2;
+    f3[1] -= dely2*tk1/r2;
+    f3[2] -= delz2*tk1/r2;
+
+    if (EFLAG) eangle += bb_k[type]*dr1*dr2;
+
+    // force & energy for bond-angle term
+
+    aa1 = s * dr1 * ba_k1[type];
+    aa2 = s * dr2 * ba_k2[type];
+
+    aa11 = aa1 * c / rsq1;
+    aa12 = -aa1 / (r1 * r2);
+    aa21 = aa2 * c / rsq1;
+    aa22 = -aa2 / (r1 * r2);
+
+    vx11 = (aa11 * delx1) + (aa12 * delx2);
+    vx12 = (aa21 * delx1) + (aa22 * delx2);
+    vy11 = (aa11 * dely1) + (aa12 * dely2);
+    vy12 = (aa21 * dely1) + (aa22 * dely2);
+    vz11 = (aa11 * delz1) + (aa12 * delz2);
+    vz12 = (aa21 * delz1) + (aa22 * delz2);
+
+    aa11 = aa1 * c / rsq2;
+    aa21 = aa2 * c / rsq2;
+
+    vx21 = (aa11 * delx2) + (aa12 * delx1);
+    vx22 = (aa21 * delx2) + (aa22 * delx1);
+    vy21 = (aa11 * dely2) + (aa12 * dely1);
+    vy22 = (aa21 * dely2) + (aa22 * dely1);
+    vz21 = (aa11 * delz2) + (aa12 * delz1);
+    vz22 = (aa21 * delz2) + (aa22 * delz1);
+
+    b1 = ba_k1[type] * dtheta / r1;
+    b2 = ba_k2[type] * dtheta / r2;
+
+    f1[0] -= vx11 + b1*delx1 + vx12;
+    f1[1] -= vy11 + b1*dely1 + vy12;
+    f1[2] -= vz11 + b1*delz1 + vz12;
+
+    f3[0] -= vx21 + b2*delx2 + vx22;
+    f3[1] -= vy21 + b2*dely2 + vy22;
+    f3[2] -= vz21 + b2*delz2 + vz22;
+
+    if (EFLAG) eangle += ba_k1[type]*dr1*dtheta + ba_k2[type]*dr2*dtheta;
+
+    // apply force to each of 3 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0];
+      f[i1][1] += f1[1];
+      f[i1][2] += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= f1[0] + f3[0];
+      f[i2][1] -= f1[1] + f3[1];
+      f[i2][2] -= f1[2] + f3[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0];
+      f[i3][1] += f3[1];
+      f[i3][2] += f3[2];
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3,
+			     delx1,dely1,delz1,delx2,dely2,delz2,thr);
+  }
+}
diff --git a/src/USER-OMP/angle_class2_omp.h b/src/USER-OMP/angle_class2_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..039d86b8afea15fe47eef6702ae0c37aaf8ff36a
--- /dev/null
+++ b/src/USER-OMP/angle_class2_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef ANGLE_CLASS
+
+AngleStyle(class2/omp,AngleClass2OMP)
+
+#else
+
+#ifndef LMP_ANGLE_CLASS2_OMP_H
+#define LMP_ANGLE_CLASS2_OMP_H
+
+#include "angle_class2.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class AngleClass2OMP : public AngleClass2, public ThrOMP {
+
+ public:
+    AngleClass2OMP(class LAMMPS *lmp) : 
+      AngleClass2(lmp), ThrOMP(lmp,THR_ANGLE) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/angle_cosine_delta_omp.cpp b/src/USER-OMP/angle_cosine_delta_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..bcd4d56bf48ca5877c698d4d4ecaf50c0cddaa12
--- /dev/null
+++ b/src/USER-OMP/angle_cosine_delta_omp.cpp
@@ -0,0 +1,183 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "angle_cosine_delta_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+
+#include "math_const.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define SMALL 0.001
+
+/* ---------------------------------------------------------------------- */
+
+void AngleCosineDeltaOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nanglelist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void AngleCosineDeltaOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,i3,n,type;
+  double delx1,dely1,delz1,delx2,dely2,delz2,theta,dtheta,dcostheta,tk;
+  double eangle,f1[3],f3[3];
+  double rsq1,rsq2,r1,r2,c,a,cot,a11,a12,a22,b11,b12,b22,c0,s0,s;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const anglelist = neighbor->anglelist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = anglelist[n][0];
+    i2 = anglelist[n][1];
+    i3 = anglelist[n][2];
+    type = anglelist[n][3];
+
+    // 1st bond
+
+    delx1 = x[i1][0] - x[i2][0];
+    dely1 = x[i1][1] - x[i2][1];
+    delz1 = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx1,dely1,delz1);
+
+    rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
+    r1 = sqrt(rsq1);
+
+    // 2nd bond
+
+    delx2 = x[i3][0] - x[i2][0];
+    dely2 = x[i3][1] - x[i2][1];
+    delz2 = x[i3][2] - x[i2][2];
+    domain->minimum_image(delx2,dely2,delz2);
+
+    rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+    r2 = sqrt(rsq2);
+
+    // angle (cos and sin)
+
+    c = delx1*delx2 + dely1*dely2 + delz1*delz2;
+    c /= r1*r2;
+        
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+        
+    theta = acos(c);
+ 
+    s = sqrt(1.0 - c*c);
+    if (s < SMALL) s = SMALL;
+    s = 1.0/s;
+   
+    cot = c/s;
+ 
+    // force & energy
+	
+    dtheta = theta - theta0[type]; 
+    dcostheta = cos(dtheta);
+    tk = k[type] * (1.0-dcostheta);
+
+    if (EFLAG) eangle = tk;
+
+    a = -k[type];
+
+    // expand dtheta for cos and sin contribution to force
+    
+    a11 = a*c / rsq1;
+    a12 = -a / (r1*r2);
+    a22 = a*c / rsq2;
+    
+    b11 = -a*c*cot / rsq1;
+    b12 = a*cot / (r1*r2);
+    b22 = -a*c*cot / rsq2;
+        
+    c0 = cos(theta0[type]);
+    s0 = sin(theta0[type]);
+
+    f1[0] = (a11*delx1 + a12*delx2)*c0 + (b11*delx1 + b12*delx2)*s0;
+    f1[1] = (a11*dely1 + a12*dely2)*c0 + (b11*dely1 + b12*dely2)*s0;
+    f1[2] = (a11*delz1 + a12*delz2)*c0 + (b11*delz1 + b12*delz2)*s0;
+    f3[0] = (a22*delx2 + a12*delx1)*c0 + (b22*delx2 + b12*delx1)*s0;
+    f3[1] = (a22*dely2 + a12*dely1)*c0 + (b22*dely2 + b12*dely1)*s0;
+    f3[2] = (a22*delz2 + a12*delz1)*c0 + (b22*delz2 + b12*delz1)*s0;
+
+    // apply force to each of 3 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0];
+      f[i1][1] += f1[1];
+      f[i1][2] += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= f1[0] + f3[0];
+      f[i2][1] -= f1[1] + f3[1];
+      f[i2][2] -= f1[2] + f3[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0];
+      f[i3][1] += f3[1];
+      f[i3][2] += f3[2];
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3,
+			     delx1,dely1,delz1,delx2,dely2,delz2,thr);
+  }
+}
diff --git a/src/USER-OMP/angle_cosine_delta_omp.h b/src/USER-OMP/angle_cosine_delta_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..86e96eb9004c5b4a4aca0a6e415ec92a657070aa
--- /dev/null
+++ b/src/USER-OMP/angle_cosine_delta_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef ANGLE_CLASS
+
+AngleStyle(cosine/delta/omp,AngleCosineDeltaOMP)
+
+#else
+
+#ifndef LMP_ANGLE_COSINE_DELTA_OMP_H
+#define LMP_ANGLE_COSINE_DELTA_OMP_H
+
+#include "angle_cosine_delta.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class AngleCosineDeltaOMP : public AngleCosineDelta, public ThrOMP {
+
+ public:
+    AngleCosineDeltaOMP(class LAMMPS *lmp) : 
+      AngleCosineDelta(lmp), ThrOMP(lmp,THR_ANGLE) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/angle_cosine_omp.cpp b/src/USER-OMP/angle_cosine_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e35fa5489a7fbd0c9b3377f2ea48ba2af9f825dd
--- /dev/null
+++ b/src/USER-OMP/angle_cosine_omp.cpp
@@ -0,0 +1,160 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "angle_cosine_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+
+#include "math_const.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define SMALL 0.001
+
+/* ---------------------------------------------------------------------- */
+
+void AngleCosineOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nanglelist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void AngleCosineOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,i3,n,type;
+  double delx1,dely1,delz1,delx2,dely2,delz2;
+  double eangle,f1[3],f3[3];
+  double rsq1,rsq2,r1,r2,c,a,a11,a12,a22;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const anglelist = neighbor->anglelist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = anglelist[n][0];
+    i2 = anglelist[n][1];
+    i3 = anglelist[n][2];
+    type = anglelist[n][3];
+
+    // 1st bond
+
+    delx1 = x[i1][0] - x[i2][0];
+    dely1 = x[i1][1] - x[i2][1];
+    delz1 = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx1,dely1,delz1);
+
+    rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
+    r1 = sqrt(rsq1);
+
+    // 2nd bond
+
+    delx2 = x[i3][0] - x[i2][0];
+    dely2 = x[i3][1] - x[i2][1];
+    delz2 = x[i3][2] - x[i2][2];
+    domain->minimum_image(delx2,dely2,delz2);
+
+    rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+    r2 = sqrt(rsq2);
+
+    // c = cosine of angle
+
+    c = delx1*delx2 + dely1*dely2 + delz1*delz2;
+    c /= r1*r2;
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+
+    // force & energy
+
+    if (EFLAG) eangle = k[type]*(1.0+c);
+
+    a = k[type];
+    a11 = a*c / rsq1;
+    a12 = -a / (r1*r2);
+    a22 = a*c / rsq2;
+
+    f1[0] = a11*delx1 + a12*delx2;
+    f1[1] = a11*dely1 + a12*dely2;
+    f1[2] = a11*delz1 + a12*delz2;
+    f3[0] = a22*delx2 + a12*delx1;
+    f3[1] = a22*dely2 + a12*dely1;
+    f3[2] = a22*delz2 + a12*delz1;
+
+    // apply force to each of 3 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0];
+      f[i1][1] += f1[1];
+      f[i1][2] += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= f1[0] + f3[0];
+      f[i2][1] -= f1[1] + f3[1];
+      f[i2][2] -= f1[2] + f3[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0];
+      f[i3][1] += f3[1];
+      f[i3][2] += f3[2];
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3,
+			     delx1,dely1,delz1,delx2,dely2,delz2,thr);
+  }
+}
diff --git a/src/USER-OMP/angle_cosine_omp.h b/src/USER-OMP/angle_cosine_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..29b3e8b86bef644f52e48992fad1179a49c2bb27
--- /dev/null
+++ b/src/USER-OMP/angle_cosine_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef ANGLE_CLASS
+
+AngleStyle(cosine/omp,AngleCosineOMP)
+
+#else
+
+#ifndef LMP_ANGLE_COSINE_OMP_H
+#define LMP_ANGLE_COSINE_OMP_H
+
+#include "angle_cosine.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class AngleCosineOMP : public AngleCosine, public ThrOMP {
+
+ public:
+    AngleCosineOMP(class LAMMPS *lmp) : 
+      AngleCosine(lmp), ThrOMP(lmp,THR_ANGLE) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/angle_cosine_periodic_omp.cpp b/src/USER-OMP/angle_cosine_periodic_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d7578cac9fb907519718c67d619264650a76da4e
--- /dev/null
+++ b/src/USER-OMP/angle_cosine_periodic_omp.cpp
@@ -0,0 +1,198 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "angle_cosine_periodic_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+
+#include "math_const.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define SMALL 0.001
+
+/* ---------------------------------------------------------------------- */
+
+void AngleCosinePeriodicOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nanglelist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void AngleCosinePeriodicOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i,i1,i2,i3,n,m,type,b_factor;
+  double delx1,dely1,delz1,delx2,dely2,delz2;
+  double eangle,f1[3],f3[3];
+  double rsq1,rsq2,r1,r2,c,s,a,a11,a12,a22;
+  double tn,tn_1,tn_2,un,un_1,un_2;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const anglelist = neighbor->anglelist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = anglelist[n][0];
+    i2 = anglelist[n][1];
+    i3 = anglelist[n][2];
+    type = anglelist[n][3];
+
+    // 1st bond
+
+    delx1 = x[i1][0] - x[i2][0];
+    dely1 = x[i1][1] - x[i2][1];
+    delz1 = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx1,dely1,delz1);
+
+    rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
+    r1 = sqrt(rsq1);
+
+    // 2nd bond
+
+    delx2 = x[i3][0] - x[i2][0];
+    dely2 = x[i3][1] - x[i2][1];
+    delz2 = x[i3][2] - x[i2][2];
+    domain->minimum_image(delx2,dely2,delz2);
+
+    rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+    r2 = sqrt(rsq2);
+
+    // c = cosine of angle
+
+    c = delx1*delx2 + dely1*dely2 + delz1*delz2;
+    c /= r1*r2;
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+
+    m = multiplicity[type];
+    b_factor = b[type];
+
+    // cos(n*x) = Tn(cos(x))
+    // Tn(x) = Chebyshev polynomials of the first kind: T_0 = 1, T_1 = x, ...
+    // recurrence relationship:
+    // Tn(x) = 2*x*T[n-1](x) - T[n-2](x) where T[-1](x) = 0
+    // also, dTn(x)/dx = n*U[n-1](x)
+    // where Un(x) = 2*x*U[n-1](x) - U[n-2](x) and U[-1](x) = 0
+    // finally need to handle special case for n = 1
+
+    tn = 1.0;
+    tn_1 = 1.0;
+    tn_2 = 0.0;
+    un = 1.0; 
+    un_1 = 2.0;
+    un_2 = 0.0;
+
+    s = sqrt(1.0 - c*c);
+    if (s < SMALL) s = SMALL;
+    s = 1.0/s;
+
+    // force & energy
+
+    tn_2 = c;
+    for (i = 1; i <= m; i++) {
+      tn = 2*c*tn_1 - tn_2;
+      tn_2 = tn_1;
+      tn_1 = tn;
+    }
+
+    for (i = 2; i <= m; i++) {
+      un = 2*c*un_1 - un_2;
+      un_2 = un_1;
+      un_1 = un;
+    }
+    tn = b_factor*pow(-1.0,m)*tn;
+    un = b_factor*pow(-1.0,m)*m*un;
+
+    if (EFLAG) eangle = 2*k[type]*(1.0 - tn);
+
+    a = -k[type]*un;
+    a11 = a*c / rsq1;
+    a12 = -a / (r1*r2);
+    a22 = a*c / rsq2;
+        
+    f1[0] = a11*delx1 + a12*delx2;
+    f1[1] = a11*dely1 + a12*dely2;
+    f1[2] = a11*delz1 + a12*delz2;
+    f3[0] = a22*delx2 + a12*delx1;
+    f3[1] = a22*dely2 + a12*dely1;
+    f3[2] = a22*delz2 + a12*delz1;
+
+    // apply force to each of 3 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0];
+      f[i1][1] += f1[1];
+      f[i1][2] += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= f1[0] + f3[0];
+      f[i2][1] -= f1[1] + f3[1];
+      f[i2][2] -= f1[2] + f3[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0];
+      f[i3][1] += f3[1];
+      f[i3][2] += f3[2];
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3,
+			     delx1,dely1,delz1,delx2,dely2,delz2,thr);
+  }
+}
diff --git a/src/USER-OMP/angle_cosine_periodic_omp.h b/src/USER-OMP/angle_cosine_periodic_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..63325ff8f51d9be2dc6e9f62dde8b306ec9a9f98
--- /dev/null
+++ b/src/USER-OMP/angle_cosine_periodic_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef ANGLE_CLASS
+
+AngleStyle(cosine/periodic/omp,AngleCosinePeriodicOMP)
+
+#else
+
+#ifndef LMP_ANGLE_COSINE_PERIODIC_OMP_H
+#define LMP_ANGLE_COSINE_PERIODIC_OMP_H
+
+#include "angle_cosine_periodic.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class AngleCosinePeriodicOMP : public AngleCosinePeriodic, public ThrOMP {
+
+ public:
+    AngleCosinePeriodicOMP(class LAMMPS *lmp) : 
+      AngleCosinePeriodic(lmp), ThrOMP(lmp,THR_ANGLE) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/angle_cosine_shift_exp_omp.cpp b/src/USER-OMP/angle_cosine_shift_exp_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3959678c946075ba81e8a8a4f360668cec75ab22
--- /dev/null
+++ b/src/USER-OMP/angle_cosine_shift_exp_omp.cpp
@@ -0,0 +1,181 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "angle_cosine_shift_exp_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+
+#include "math_const.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define SMALL 0.001
+
+/* ---------------------------------------------------------------------- */
+
+void AngleCosineShiftExpOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nanglelist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void AngleCosineShiftExpOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,i3,n,type;
+  double delx1,dely1,delz1,delx2,dely2,delz2;
+  double eangle,f1[3],f3[3],ff;
+  double rsq1,rsq2,r1,r2,c,s,a11,a12,a22;
+  double exp2,aa,uumin,cccpsss,cssmscc;            
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const anglelist = neighbor->anglelist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = anglelist[n][0];
+    i2 = anglelist[n][1];
+    i3 = anglelist[n][2];
+    type = anglelist[n][3];
+
+    // 1st bond
+
+    delx1 = x[i1][0] - x[i2][0];
+    dely1 = x[i1][1] - x[i2][1];
+    delz1 = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx1,dely1,delz1);
+
+    rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
+    r1 = sqrt(rsq1);
+
+    // 2nd bond
+
+    delx2 = x[i3][0] - x[i2][0];
+    dely2 = x[i3][1] - x[i2][1];
+    delz2 = x[i3][2] - x[i2][2];
+    domain->minimum_image(delx2,dely2,delz2);
+
+    rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+    r2 = sqrt(rsq2);
+
+    // c = cosine of angle
+    c = delx1*delx2 + dely1*dely2 + delz1*delz2;
+    c /= r1*r2;
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+
+    // C= sine of angle
+    s = sqrt(1.0 - c*c);
+    if (s < SMALL) s = SMALL;
+    
+    // force & energy
+        
+    aa=a[type];
+    uumin=umin[type];
+
+    cccpsss = c*cost[type]+s*sint[type];
+    cssmscc = c*sint[type]-s*cost[type];
+
+    if (doExpansion[type])
+       {  //  |a|<0.01 so use expansions relative precision <1e-5
+//         std::cout << "Using expansion\n";
+            if (EFLAG) eangle = -0.125*(1+cccpsss)*(4+aa*(cccpsss-1))*uumin;
+            ff=0.25*uumin*cssmscc*(2+aa*cccpsss)/s;   
+       }
+     else
+       {
+//   std::cout << "Not using expansion\n";            
+            exp2=exp(0.5*aa*(1+cccpsss));
+            if (EFLAG) eangle = opt1[type]*(1-exp2);
+            ff=0.5*a[type]*opt1[type]*exp2*cssmscc/s;       
+       }
+
+    a11 =   ff*c/ rsq1;
+    a12 =  -ff  / (r1*r2);
+    a22 =   ff*c/ rsq2;      
+
+    f1[0] = a11*delx1 + a12*delx2;
+    f1[1] = a11*dely1 + a12*dely2;
+    f1[2] = a11*delz1 + a12*delz2;
+    f3[0] = a22*delx2 + a12*delx1;
+    f3[1] = a22*dely2 + a12*dely1;
+    f3[2] = a22*delz2 + a12*delz1;
+
+    // apply force to each of 3 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0];
+      f[i1][1] += f1[1];
+      f[i1][2] += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= f1[0] + f3[0];
+      f[i2][1] -= f1[1] + f3[1];
+      f[i2][2] -= f1[2] + f3[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0];
+      f[i3][1] += f3[1];
+      f[i3][2] += f3[2];
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3,
+			     delx1,dely1,delz1,delx2,dely2,delz2,thr);
+  }
+}
diff --git a/src/USER-OMP/angle_cosine_shift_exp_omp.h b/src/USER-OMP/angle_cosine_shift_exp_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..b9adbf104dade8f1f00d2b88714b2eec82a76f33
--- /dev/null
+++ b/src/USER-OMP/angle_cosine_shift_exp_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef ANGLE_CLASS
+
+AngleStyle(cosine/shift/exp/omp,AngleCosineShiftExpOMP)
+
+#else
+
+#ifndef LMP_ANGLE_COSINE_SHIFT_EXP_OMP_H
+#define LMP_ANGLE_COSINE_SHIFT_EXP_OMP_H
+
+#include "angle_cosine_shift_exp.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class AngleCosineShiftExpOMP : public AngleCosineShiftExp, public ThrOMP {
+
+ public:
+    AngleCosineShiftExpOMP(class LAMMPS *lmp) : 
+      AngleCosineShiftExp(lmp), ThrOMP(lmp,THR_ANGLE) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/angle_cosine_shift_omp.cpp b/src/USER-OMP/angle_cosine_shift_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5cb3e793a3593c1fa4584af7289049f27a4af934
--- /dev/null
+++ b/src/USER-OMP/angle_cosine_shift_omp.cpp
@@ -0,0 +1,165 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "angle_cosine_shift_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+
+#include "math_const.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define SMALL 0.001
+
+/* ---------------------------------------------------------------------- */
+
+void AngleCosineShiftOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nanglelist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void AngleCosineShiftOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,i3,n,type;
+  double delx1,dely1,delz1,delx2,dely2,delz2;
+  double eangle,f1[3],f3[3];
+  double rsq1,rsq2,r1,r2,c,s,cps,kcos,ksin,a11,a12,a22;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const anglelist = neighbor->anglelist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = anglelist[n][0];
+    i2 = anglelist[n][1];
+    i3 = anglelist[n][2];
+    type = anglelist[n][3];
+
+    // 1st bond
+
+    delx1 = x[i1][0] - x[i2][0];
+    dely1 = x[i1][1] - x[i2][1];
+    delz1 = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx1,dely1,delz1);
+
+    rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
+    r1 = sqrt(rsq1);
+
+    // 2nd bond
+
+    delx2 = x[i3][0] - x[i2][0];
+    dely2 = x[i3][1] - x[i2][1];
+    delz2 = x[i3][2] - x[i2][2];
+    domain->minimum_image(delx2,dely2,delz2);
+
+    rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+    r2 = sqrt(rsq2);
+
+    // c = cosine of angle
+    c = delx1*delx2 + dely1*dely2 + delz1*delz2;
+    c /= r1*r2;
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+
+    // C= sine of angle
+    s = sqrt(1.0 - c*c);
+    if (s < SMALL) s = SMALL;
+    
+    // force & energy
+    if (EFLAG) eangle = -k[type]-kcos*c-ksin*s;  
+
+    kcos=kcost[type];
+    ksin=ksint[type];
+    cps = c/s;          // NOTE absorbed one c
+
+    a11 = (-kcos +ksin*cps )*c/ rsq1;
+    a12 = ( kcos -ksin*cps )  / (r1*r2);
+    a22 = (-kcos +ksin*cps )*c/ rsq2;
+
+    f1[0] = a11*delx1 + a12*delx2;
+    f1[1] = a11*dely1 + a12*dely2;
+    f1[2] = a11*delz1 + a12*delz2;
+    f3[0] = a22*delx2 + a12*delx1;
+    f3[1] = a22*dely2 + a12*dely1;
+    f3[2] = a22*delz2 + a12*delz1;
+
+    // apply force to each of 3 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0];
+      f[i1][1] += f1[1];
+      f[i1][2] += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= f1[0] + f3[0];
+      f[i2][1] -= f1[1] + f3[1];
+      f[i2][2] -= f1[2] + f3[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0];
+      f[i3][1] += f3[1];
+      f[i3][2] += f3[2];
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3,
+			     delx1,dely1,delz1,delx2,dely2,delz2,thr);
+  }
+}
diff --git a/src/USER-OMP/angle_cosine_shift_omp.h b/src/USER-OMP/angle_cosine_shift_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..dadbd1cf534168903a9f1f3217f41c176d163122
--- /dev/null
+++ b/src/USER-OMP/angle_cosine_shift_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef ANGLE_CLASS
+
+AngleStyle(cosine/shift/omp,AngleCosineShiftOMP)
+
+#else
+
+#ifndef LMP_ANGLE_COSINE_SHIFT_OMP_H
+#define LMP_ANGLE_COSINE_SHIFT_OMP_H
+
+#include "angle_cosine_shift.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class AngleCosineShiftOMP : public AngleCosineShift, public ThrOMP {
+
+ public:
+    AngleCosineShiftOMP(class LAMMPS *lmp) : 
+      AngleCosineShift(lmp), ThrOMP(lmp,THR_ANGLE) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/angle_cosine_squared_omp.cpp b/src/USER-OMP/angle_cosine_squared_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8c39d0895992494b4e92b9213b35e23eb50bac35
--- /dev/null
+++ b/src/USER-OMP/angle_cosine_squared_omp.cpp
@@ -0,0 +1,165 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "angle_cosine_squared_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+
+#include "math_const.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define SMALL 0.001
+
+/* ---------------------------------------------------------------------- */
+
+void AngleCosineSquaredOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nanglelist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void AngleCosineSquaredOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,i3,n,type;
+  double delx1,dely1,delz1,delx2,dely2,delz2;
+  double eangle,f1[3],f3[3];
+  double dcostheta,tk;
+  double rsq1,rsq2,r1,r2,c,a,a11,a12,a22;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const anglelist = neighbor->anglelist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = anglelist[n][0];
+    i2 = anglelist[n][1];
+    i3 = anglelist[n][2];
+    type = anglelist[n][3];
+
+    // 1st bond
+
+    delx1 = x[i1][0] - x[i2][0];
+    dely1 = x[i1][1] - x[i2][1];
+    delz1 = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx1,dely1,delz1);
+
+    rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
+    r1 = sqrt(rsq1);
+
+    // 2nd bond
+
+    delx2 = x[i3][0] - x[i2][0];
+    dely2 = x[i3][1] - x[i2][1];
+    delz2 = x[i3][2] - x[i2][2];
+    domain->minimum_image(delx2,dely2,delz2);
+
+    rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+    r2 = sqrt(rsq2);
+
+    // angle (cos and sin)
+
+    c = delx1*delx2 + dely1*dely2 + delz1*delz2;
+    c /= r1*r2;
+        
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+        
+    // force & energy
+	
+    dcostheta = c - cos(theta0[type]);
+    tk = k[type] * dcostheta;
+
+    if (EFLAG) eangle = tk*dcostheta;
+
+    a = 2.0 * tk;
+    a11 = a*c / rsq1;
+    a12 = -a / (r1*r2);
+    a22 = a*c / rsq2;
+
+    f1[0] = a11*delx1 + a12*delx2;
+    f1[1] = a11*dely1 + a12*dely2;
+    f1[2] = a11*delz1 + a12*delz2;
+    f3[0] = a22*delx2 + a12*delx1;
+    f3[1] = a22*dely2 + a12*dely1;
+    f3[2] = a22*delz2 + a12*delz1;
+
+    // apply force to each of 3 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0];
+      f[i1][1] += f1[1];
+      f[i1][2] += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= f1[0] + f3[0];
+      f[i2][1] -= f1[1] + f3[1];
+      f[i2][2] -= f1[2] + f3[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0];
+      f[i3][1] += f3[1];
+      f[i3][2] += f3[2];
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3,
+			     delx1,dely1,delz1,delx2,dely2,delz2,thr);
+  }
+}
diff --git a/src/USER-OMP/angle_cosine_squared_omp.h b/src/USER-OMP/angle_cosine_squared_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..82c7c06e893930773fa24c2444aa7a4725eceed1
--- /dev/null
+++ b/src/USER-OMP/angle_cosine_squared_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef ANGLE_CLASS
+
+AngleStyle(cosine/squared/omp,AngleCosineSquaredOMP)
+
+#else
+
+#ifndef LMP_ANGLE_COSINE_SQUARED_OMP_H
+#define LMP_ANGLE_COSINE_SQUARED_OMP_H
+
+#include "angle_cosine_squared.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class AngleCosineSquaredOMP : public AngleCosineSquared, public ThrOMP {
+
+ public:
+    AngleCosineSquaredOMP(class LAMMPS *lmp) : 
+      AngleCosineSquared(lmp), ThrOMP(lmp,THR_ANGLE) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/angle_harmonic_omp.cpp b/src/USER-OMP/angle_harmonic_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..46f4654e33d144209816cec0dff2c87eece66176
--- /dev/null
+++ b/src/USER-OMP/angle_harmonic_omp.cpp
@@ -0,0 +1,169 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "angle_harmonic_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+
+#include "math_const.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define SMALL 0.001
+
+/* ---------------------------------------------------------------------- */
+
+void AngleHarmonicOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nanglelist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void AngleHarmonicOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,i3,n,type;
+  double delx1,dely1,delz1,delx2,dely2,delz2;
+  double eangle,f1[3],f3[3];
+  double dtheta,tk;
+  double rsq1,rsq2,r1,r2,c,s,a,a11,a12,a22;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const anglelist = neighbor->anglelist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = anglelist[n][0];
+    i2 = anglelist[n][1];
+    i3 = anglelist[n][2];
+    type = anglelist[n][3];
+
+    // 1st bond
+
+    delx1 = x[i1][0] - x[i2][0];
+    dely1 = x[i1][1] - x[i2][1];
+    delz1 = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx1,dely1,delz1);
+
+    rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
+    r1 = sqrt(rsq1);
+
+    // 2nd bond
+
+    delx2 = x[i3][0] - x[i2][0];
+    dely2 = x[i3][1] - x[i2][1];
+    delz2 = x[i3][2] - x[i2][2];
+    domain->minimum_image(delx2,dely2,delz2);
+
+    rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+    r2 = sqrt(rsq2);
+
+    // angle (cos and sin)
+
+    c = delx1*delx2 + dely1*dely2 + delz1*delz2;
+    c /= r1*r2;
+        
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+        
+    s = sqrt(1.0 - c*c);
+    if (s < SMALL) s = SMALL;
+    s = 1.0/s;
+
+    // force & energy
+
+    dtheta = acos(c) - theta0[type];
+    tk = k[type] * dtheta;
+
+    if (EFLAG) eangle = tk*dtheta;
+
+    a = -2.0 * tk * s;
+    a11 = a*c / rsq1;
+    a12 = -a / (r1*r2);
+    a22 = a*c / rsq2;
+
+    f1[0] = a11*delx1 + a12*delx2;
+    f1[1] = a11*dely1 + a12*dely2;
+    f1[2] = a11*delz1 + a12*delz2;
+    f3[0] = a22*delx2 + a12*delx1;
+    f3[1] = a22*dely2 + a12*dely1;
+    f3[2] = a22*delz2 + a12*delz1;
+
+    // apply force to each of 3 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0];
+      f[i1][1] += f1[1];
+      f[i1][2] += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= f1[0] + f3[0];
+      f[i2][1] -= f1[1] + f3[1];
+      f[i2][2] -= f1[2] + f3[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0];
+      f[i3][1] += f3[1];
+      f[i3][2] += f3[2];
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3,
+			     delx1,dely1,delz1,delx2,dely2,delz2,thr);
+  }
+}
diff --git a/src/USER-OMP/angle_harmonic_omp.h b/src/USER-OMP/angle_harmonic_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..3855726192aa30f15216fbc338755fa541507326
--- /dev/null
+++ b/src/USER-OMP/angle_harmonic_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef ANGLE_CLASS
+
+AngleStyle(harmonic/omp,AngleHarmonicOMP)
+
+#else
+
+#ifndef LMP_ANGLE_HARMONIC_OMP_H
+#define LMP_ANGLE_HARMONIC_OMP_H
+
+#include "angle_harmonic.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class AngleHarmonicOMP : public AngleHarmonic, public ThrOMP {
+
+ public:
+    AngleHarmonicOMP(class LAMMPS *lmp) : 
+      AngleHarmonic(lmp), ThrOMP(lmp,THR_ANGLE) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/angle_sdk_omp.cpp b/src/USER-OMP/angle_sdk_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3b6c1fee7a43f491ac229f0f3a1623c0326002f6
--- /dev/null
+++ b/src/USER-OMP/angle_sdk_omp.cpp
@@ -0,0 +1,225 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "angle_sdk_omp.h"
+#include "atom.h"
+#include "neighbor.h"
+#include "domain.h"
+#include "comm.h"
+#include "force.h"
+#include "math_const.h"
+
+#include <math.h>
+
+#include "lj_sdk_common.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+using namespace LJSDKParms;
+
+#define SMALL 0.001
+
+/* ---------------------------------------------------------------------- */
+
+void AngleSDKOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nanglelist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void AngleSDKOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,i3,n,type;
+  double delx1,dely1,delz1,delx2,dely2,delz2,delx3,dely3,delz3;
+  double eangle,f1[3],f3[3],e13,f13;
+  double dtheta,tk;
+  double rsq1,rsq2,rsq3,r1,r2,c,s,a,a11,a12,a22;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const anglelist = neighbor->anglelist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = anglelist[n][0];
+    i2 = anglelist[n][1];
+    i3 = anglelist[n][2];
+    type = anglelist[n][3];
+
+    // 1st bond
+
+    delx1 = x[i1][0] - x[i2][0];
+    dely1 = x[i1][1] - x[i2][1];
+    delz1 = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx1,dely1,delz1);
+
+    rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
+    r1 = sqrt(rsq1);
+
+    // 2nd bond
+
+    delx2 = x[i3][0] - x[i2][0];
+    dely2 = x[i3][1] - x[i2][1];
+    delz2 = x[i3][2] - x[i2][2];
+    domain->minimum_image(delx2,dely2,delz2);
+
+    rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+    r2 = sqrt(rsq2);
+
+    // angle (cos and sin)
+
+    c = delx1*delx2 + dely1*dely2 + delz1*delz2;
+    c /= r1*r2;
+        
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+        
+    s = sqrt(1.0 - c*c);
+    if (s < SMALL) s = SMALL;
+    s = 1.0/s;
+
+    // 1-3 LJ interaction. 
+    // we only want to use the repulsive part,
+    // and it can be scaled (or off).
+    // so this has to be done here and not in the
+    // general non-bonded code.
+
+    if (repflag) {
+      
+      delx3 = x[i1][0] - x[i3][0];
+      dely3 = x[i1][1] - x[i3][1];
+      delz3 = x[i1][2] - x[i3][2];
+      domain->minimum_image(delx3,dely3,delz3);
+      rsq3 = delx3*delx3 + dely3*dely3 + delz3*delz3;
+
+      const int type1 = atom->type[i1];
+      const int type3 = atom->type[i3];
+      
+      f13=0.0;
+      e13=0.0;
+    
+      if (rsq3 < rminsq[type1][type3]) {
+	const int ljt = lj_type[type1][type3];
+	const double r2inv = 1.0/rsq3;
+
+	if (ljt == LJ12_4) {
+	  const double r4inv=r2inv*r2inv;
+
+	  f13 = r4inv*(lj1[type1][type3]*r4inv*r4inv - lj2[type1][type3]);
+	  if (EFLAG) e13 = r4inv*(lj3[type1][type3]*r4inv*r4inv - lj4[type1][type3]);
+	  
+	} else if (ljt == LJ9_6) {
+	  const double r3inv = r2inv*sqrt(r2inv);
+	  const double r6inv = r3inv*r3inv;
+
+	  f13 = r6inv*(lj1[type1][type3]*r3inv - lj2[type1][type3]);
+	  if (EFLAG) e13 = r6inv*(lj3[type1][type3]*r3inv - lj4[type1][type3]);
+
+	} else if (ljt == LJ12_6) {
+	  const double r6inv = r2inv*r2inv*r2inv;
+
+	  f13 = r6inv*(lj1[type1][type3]*r6inv - lj2[type1][type3]);
+	  if (EFLAG) e13 = r6inv*(lj3[type1][type3]*r6inv - lj4[type1][type3]);
+	}
+
+	// make sure energy is 0.0 at the cutoff.
+	if (EFLAG) e13 -= emin[type1][type3];
+
+	f13 *= r2inv;
+      }
+    }
+
+    // force & energy
+
+    dtheta = acos(c) - theta0[type];
+    tk = k[type] * dtheta;
+
+    if (EFLAG) eangle = tk*dtheta;
+
+    a = -2.0 * tk * s;
+    a11 = a*c / rsq1;
+    a12 = -a / (r1*r2);
+    a22 = a*c / rsq2;
+
+    f1[0] = a11*delx1 + a12*delx2;
+    f1[1] = a11*dely1 + a12*dely2;
+    f1[2] = a11*delz1 + a12*delz2;
+    f3[0] = a22*delx2 + a12*delx1;
+    f3[1] = a22*dely2 + a12*dely1;
+    f3[2] = a22*delz2 + a12*delz1;
+
+    // apply force to each of the 3 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0] + f13*delx3;
+      f[i1][1] += f1[1] + f13*dely3;
+      f[i1][2] += f1[2] + f13*delz3;
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= f1[0] + f3[0];
+      f[i2][1] -= f1[1] + f3[1];
+      f[i2][2] -= f1[2] + f3[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0] - f13*delx3;
+      f[i3][1] += f3[1] - f13*dely3;
+      f[i3][2] += f3[2] - f13*delz3;
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3,
+			     delx1,dely1,delz1,delx2,dely2,delz2,thr);
+    if (EVFLAG) ev_tally13_thr(this,i1,i3,nlocal,NEWTON_BOND,
+			       e13,f13,delx3,dely3,delz3,thr);
+
+  }
+}
diff --git a/src/USER-OMP/angle_sdk_omp.h b/src/USER-OMP/angle_sdk_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..ed9e1bc40c0168169dfc80619e9be061679ec087
--- /dev/null
+++ b/src/USER-OMP/angle_sdk_omp.h
@@ -0,0 +1,49 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef ANGLE_CLASS
+
+AngleStyle(sdk/omp,AngleSDKOMP)
+AngleStyle(cg/cmm/omp,AngleSDKOMP)
+
+#else
+
+#ifndef LMP_ANGLE_SDK_OMP_H
+#define LMP_ANGLE_SDK_OMP_H
+
+#include "angle_sdk.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class AngleSDKOMP : public AngleSDK, public ThrOMP {
+
+ public:
+    AngleSDKOMP(class LAMMPS *lmp) : 
+      AngleSDK(lmp), ThrOMP(lmp,THR_ANGLE) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/angle_table_omp.cpp b/src/USER-OMP/angle_table_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ef37c00433635673ca858d5a956bc14da7e51916
--- /dev/null
+++ b/src/USER-OMP/angle_table_omp.cpp
@@ -0,0 +1,169 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "angle_table_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+
+#include "math_const.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define SMALL 0.001
+
+/* ---------------------------------------------------------------------- */
+
+void AngleTableOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nanglelist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void AngleTableOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,i3,n,type;
+  double eangle,f1[3],f3[3];
+  double delx1,dely1,delz1,delx2,dely2,delz2;
+  double rsq1,rsq2,r1,r2,c,s,a,a11,a12,a22;
+  double theta,u,mdu; //mdu: minus du, -du/dx=f
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const anglelist = neighbor->anglelist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = anglelist[n][0];
+    i2 = anglelist[n][1];
+    i3 = anglelist[n][2];
+    type = anglelist[n][3];
+
+    // 1st bond
+
+    delx1 = x[i1][0] - x[i2][0];
+    dely1 = x[i1][1] - x[i2][1];
+    delz1 = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx1,dely1,delz1);
+
+    rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
+    r1 = sqrt(rsq1);
+
+    // 2nd bond
+
+    delx2 = x[i3][0] - x[i2][0];
+    dely2 = x[i3][1] - x[i2][1];
+    delz2 = x[i3][2] - x[i2][2];
+    domain->minimum_image(delx2,dely2,delz2);
+
+    rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+    r2 = sqrt(rsq2);
+
+    // angle (cos and sin)
+
+    c = delx1*delx2 + dely1*dely2 + delz1*delz2;
+    c /= r1*r2;
+        
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+        
+    s = sqrt(1.0 - c*c);
+    if (s < SMALL) s = SMALL;
+    s = 1.0/s;
+
+    // tabulated force & energy
+
+    theta = acos(c);
+    uf_lookup(type,theta,u,mdu);
+    
+    if (EFLAG) eangle = u;
+
+    a = mdu * s;               
+    a11 = a*c / rsq1;
+    a12 = -a / (r1*r2);
+    a22 = a*c / rsq2;
+
+    f1[0] = a11*delx1 + a12*delx2;
+    f1[1] = a11*dely1 + a12*dely2;
+    f1[2] = a11*delz1 + a12*delz2;
+    f3[0] = a22*delx2 + a12*delx1;
+    f3[1] = a22*dely2 + a12*dely1;
+    f3[2] = a22*delz2 + a12*delz1;
+
+    // apply force to each of 3 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0];
+      f[i1][1] += f1[1];
+      f[i1][2] += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= f1[0] + f3[0];
+      f[i2][1] -= f1[1] + f3[1];
+      f[i2][2] -= f1[2] + f3[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0];
+      f[i3][1] += f3[1];
+      f[i3][2] += f3[2];
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3,
+			     delx1,dely1,delz1,delx2,dely2,delz2,thr);
+  }
+}
diff --git a/src/USER-OMP/angle_table_omp.h b/src/USER-OMP/angle_table_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..b0bb9c7113fed5106c957ac3174a2f91736f8e42
--- /dev/null
+++ b/src/USER-OMP/angle_table_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef ANGLE_CLASS
+
+AngleStyle(table/omp,AngleTableOMP)
+
+#else
+
+#ifndef LMP_ANGLE_TABLE_OMP_H
+#define LMP_ANGLE_TABLE_OMP_H
+
+#include "angle_table.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class AngleTableOMP : public AngleTable, public ThrOMP {
+
+ public:
+    AngleTableOMP(class LAMMPS *lmp) : 
+      AngleTable(lmp), ThrOMP(lmp,THR_ANGLE) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/bond_class2_omp.cpp b/src/USER-OMP/bond_class2_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..93c1dae04432285f8b4e33c6dc89e3ee574574df
--- /dev/null
+++ b/src/USER-OMP/bond_class2_omp.cpp
@@ -0,0 +1,124 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "bond_class2_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+void BondClass2OMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nbondlist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void BondClass2OMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,n,type;
+  double delx,dely,delz,ebond,fbond;
+  double rsq,r,dr,dr2,dr3,dr4,de_bond;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const bondlist = neighbor->bondlist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = bondlist[n][0];
+    i2 = bondlist[n][1];
+    type = bondlist[n][2];
+
+    delx = x[i1][0] - x[i2][0];
+    dely = x[i1][1] - x[i2][1];
+    delz = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx,dely,delz);
+
+    rsq = delx*delx + dely*dely + delz*delz;
+    r = sqrt(rsq);
+    dr = r - r0[type];
+    dr2 = dr*dr;
+    dr3 = dr2*dr;
+    dr4 = dr3*dr;
+
+    // force & energy
+
+    de_bond = 2.0*k2[type]*dr + 3.0*k3[type]*dr2 + 4.0*k4[type]*dr3;
+    if (r > 0.0) fbond = -de_bond/r;
+    else fbond = 0.0;
+
+    if (EFLAG) ebond = k2[type]*dr2 + k3[type]*dr3 + k4[type]*dr4;
+
+    // apply force to each of 2 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += delx*fbond;
+      f[i1][1] += dely*fbond;
+      f[i1][2] += delz*fbond;
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= delx*fbond;
+      f[i2][1] -= dely*fbond;
+      f[i2][2] -= delz*fbond;
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND,
+			     ebond,fbond,delx,dely,delz,thr);
+  }
+}
diff --git a/src/USER-OMP/bond_class2_omp.h b/src/USER-OMP/bond_class2_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..181ed4a44185c9c804bbcdd0e82175fb7d638d48
--- /dev/null
+++ b/src/USER-OMP/bond_class2_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef BOND_CLASS
+
+BondStyle(class2/omp,BondClass2OMP)
+
+#else
+
+#ifndef LMP_BOND_CLASS2_OMP_H
+#define LMP_BOND_CLASS2_OMP_H
+
+#include "bond_class2.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class BondClass2OMP : public BondClass2, public ThrOMP {
+
+ public:
+    BondClass2OMP(class LAMMPS *lmp) : 
+      BondClass2(lmp), ThrOMP(lmp,THR_BOND) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/bond_fene_expand_omp.cpp b/src/USER-OMP/bond_fene_expand_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f77e11d1df71aa0c169811ebe34fcf2ffb9f0104
--- /dev/null
+++ b/src/USER-OMP/bond_fene_expand_omp.cpp
@@ -0,0 +1,150 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "bond_fene_expand_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+#include "error.h"
+#include "update.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+void BondFENEExpandOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nbondlist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void BondFENEExpandOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,n,type;
+  double delx,dely,delz,ebond,fbond;
+  double rsq,r0sq,rlogarg,sr2,sr6;
+  double r,rshift,rshiftsq;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const bondlist = neighbor->bondlist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = bondlist[n][0];
+    i2 = bondlist[n][1];
+    type = bondlist[n][2];
+
+    delx = x[i1][0] - x[i2][0];
+    dely = x[i1][1] - x[i2][1];
+    delz = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx,dely,delz);
+
+    rsq = delx*delx + dely*dely + delz*delz;
+    r = sqrt(rsq);
+    rshift = r - shift[type];
+    rshiftsq = rshift*rshift;
+    r0sq = r0[type] * r0[type];
+    rlogarg = 1.0 - rshiftsq/r0sq;
+
+    // if r -> r0, then rlogarg < 0.0 which is an error
+    // issue a warning and reset rlogarg = epsilon
+    // if r > 2*r0 something serious is wrong, abort
+
+    if (rlogarg < 0.1) {
+      char str[128];
+      sprintf(str,"FENE bond too long: " BIGINT_FORMAT " %d %d %g",
+	      update->ntimestep,atom->tag[i1],atom->tag[i2],sqrt(rsq));
+      error->warning(FLERR,str,0);
+      if (rlogarg <= -3.0) error->one(FLERR,"Bad FENE bond");
+      rlogarg = 0.1;
+    }
+
+    fbond = -k[type]*rshift/rlogarg/r;
+
+    // force from LJ term
+
+    if (rshiftsq < TWO_1_3*sigma[type]*sigma[type]) {
+      sr2 = sigma[type]*sigma[type]/rshiftsq;
+      sr6 = sr2*sr2*sr2;
+      fbond += 48.0*epsilon[type]*sr6*(sr6-0.5)/rshift/r;
+    }
+
+    // energy
+
+    if (EFLAG) {
+      ebond = -0.5 * k[type]*r0sq*log(rlogarg);
+      if (rshiftsq < TWO_1_3*sigma[type]*sigma[type])
+	ebond += 4.0*epsilon[type]*sr6*(sr6-1.0) + epsilon[type];
+    }
+
+    // apply force to each of 2 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += delx*fbond;
+      f[i1][1] += dely*fbond;
+      f[i1][2] += delz*fbond;
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= delx*fbond;
+      f[i2][1] -= dely*fbond;
+      f[i2][2] -= delz*fbond;
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND,
+			     ebond,fbond,delx,dely,delz,thr);
+  }
+}
diff --git a/src/USER-OMP/bond_fene_expand_omp.h b/src/USER-OMP/bond_fene_expand_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..fd7fe4b04fac5c82e0528a577ce06c5808884f1f
--- /dev/null
+++ b/src/USER-OMP/bond_fene_expand_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef BOND_CLASS
+
+BondStyle(fene/expand/omp,BondFENEExpandOMP)
+
+#else
+
+#ifndef LMP_BOND_FENE_EXPAND_OMP_H
+#define LMP_BOND_FENE_EXPAND_OMP_H
+
+#include "bond_fene_expand.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class BondFENEExpandOMP : public BondFENEExpand, public ThrOMP {
+
+ public:
+    BondFENEExpandOMP(class LAMMPS *lmp) : 
+      BondFENEExpand(lmp), ThrOMP(lmp,THR_BOND) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/bond_fene_omp.cpp b/src/USER-OMP/bond_fene_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2758ed9e9e1c2218066ae1284a1845c9d9a22862
--- /dev/null
+++ b/src/USER-OMP/bond_fene_omp.cpp
@@ -0,0 +1,146 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "bond_fene_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+#include "error.h"
+#include "update.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+void BondFENEOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nbondlist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void BondFENEOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,n,type;
+  double delx,dely,delz,ebond,fbond;
+  double rsq,r0sq,rlogarg,sr2,sr6;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const bondlist = neighbor->bondlist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = bondlist[n][0];
+    i2 = bondlist[n][1];
+    type = bondlist[n][2];
+
+    delx = x[i1][0] - x[i2][0];
+    dely = x[i1][1] - x[i2][1];
+    delz = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx,dely,delz);
+
+    rsq = delx*delx + dely*dely + delz*delz;
+    r0sq = r0[type] * r0[type];
+    rlogarg = 1.0 - rsq/r0sq;
+
+    // if r -> r0, then rlogarg < 0.0 which is an error
+    // issue a warning and reset rlogarg = epsilon
+    // if r > 2*r0 something serious is wrong, abort
+
+    if (rlogarg < 0.1) {
+      char str[128];
+      sprintf(str,"FENE bond too long: " BIGINT_FORMAT " %d %d %g",
+	      update->ntimestep,atom->tag[i1],atom->tag[i2],sqrt(rsq));
+      error->warning(FLERR,str,0);
+      if (rlogarg <= -3.0) error->one(FLERR,"Bad FENE bond");
+      rlogarg = 0.1;
+    }
+
+    fbond = -k[type]/rlogarg;
+
+    // force from LJ term
+
+    if (rsq < TWO_1_3*sigma[type]*sigma[type]) {
+      sr2 = sigma[type]*sigma[type]/rsq;
+      sr6 = sr2*sr2*sr2;
+      fbond += 48.0*epsilon[type]*sr6*(sr6-0.5)/rsq;
+    }
+
+    // energy
+
+    if (EFLAG) {
+      ebond = -0.5 * k[type]*r0sq*log(rlogarg);
+      if (rsq < TWO_1_3*sigma[type]*sigma[type])
+	ebond += 4.0*epsilon[type]*sr6*(sr6-1.0) + epsilon[type];
+    }
+
+    // apply force to each of 2 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += delx*fbond;
+      f[i1][1] += dely*fbond;
+      f[i1][2] += delz*fbond;
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= delx*fbond;
+      f[i2][1] -= dely*fbond;
+      f[i2][2] -= delz*fbond;
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND,
+			     ebond,fbond,delx,dely,delz,thr);
+  }
+}
diff --git a/src/USER-OMP/bond_fene_omp.h b/src/USER-OMP/bond_fene_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..b617aa136346e446794cf5d0885cf343cb3a515a
--- /dev/null
+++ b/src/USER-OMP/bond_fene_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef BOND_CLASS
+
+BondStyle(fene/omp,BondFENEOMP)
+
+#else
+
+#ifndef LMP_BOND_FENE_OMP_H
+#define LMP_BOND_FENE_OMP_H
+
+#include "bond_fene.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class BondFENEOMP : public BondFENE, public ThrOMP {
+
+ public:
+    BondFENEOMP(class LAMMPS *lmp) : 
+      BondFENE(lmp), ThrOMP(lmp,THR_BOND) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/bond_harmonic_omp.cpp b/src/USER-OMP/bond_harmonic_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d29cce234fa8b4ba3c71a56f45c5af584b07bb90
--- /dev/null
+++ b/src/USER-OMP/bond_harmonic_omp.cpp
@@ -0,0 +1,121 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "bond_harmonic_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+void BondHarmonicOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nbondlist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void BondHarmonicOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,n,type;
+  double delx,dely,delz,ebond,fbond;
+  double rsq,r,dr,rk;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const bondlist = neighbor->bondlist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = bondlist[n][0];
+    i2 = bondlist[n][1];
+    type = bondlist[n][2];
+
+    delx = x[i1][0] - x[i2][0];
+    dely = x[i1][1] - x[i2][1];
+    delz = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx,dely,delz);
+
+    rsq = delx*delx + dely*dely + delz*delz;
+    r = sqrt(rsq);
+    dr = r - r0[type];
+    rk = k[type] * dr;
+
+    // force & energy
+
+    if (r > 0.0) fbond = -2.0*rk/r;
+    else fbond = 0.0;
+
+    if (EFLAG) ebond = rk*dr;
+
+    // apply force to each of 2 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += delx*fbond;
+      f[i1][1] += dely*fbond;
+      f[i1][2] += delz*fbond;
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= delx*fbond;
+      f[i2][1] -= dely*fbond;
+      f[i2][2] -= delz*fbond;
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND,
+			     ebond,fbond,delx,dely,delz,thr);
+  }
+}
diff --git a/src/USER-OMP/bond_harmonic_omp.h b/src/USER-OMP/bond_harmonic_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..79c12aa12a292ef4483aa4a603a54e58d39391bf
--- /dev/null
+++ b/src/USER-OMP/bond_harmonic_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef BOND_CLASS
+
+BondStyle(harmonic/omp,BondHarmonicOMP)
+
+#else
+
+#ifndef LMP_BOND_HARMONIC_OMP_H
+#define LMP_BOND_HARMONIC_OMP_H
+
+#include "bond_harmonic.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class BondHarmonicOMP : public BondHarmonic, public ThrOMP {
+
+ public:
+    BondHarmonicOMP(class LAMMPS *lmp) : 
+      BondHarmonic(lmp), ThrOMP(lmp,THR_BOND) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/bond_harmonic_shift_cut_omp.cpp b/src/USER-OMP/bond_harmonic_shift_cut_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..05f49ac6f834fc7365177c98caab3422e0e982d8
--- /dev/null
+++ b/src/USER-OMP/bond_harmonic_shift_cut_omp.cpp
@@ -0,0 +1,124 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "bond_harmonic_shift_cut_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+void BondHarmonicShiftCutOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nbondlist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void BondHarmonicShiftCutOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,n,type;
+  double delx,dely,delz,ebond,fbond;
+  double rsq,r,dr,rk;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const bondlist = neighbor->bondlist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = bondlist[n][0];
+    i2 = bondlist[n][1];
+    type = bondlist[n][2];
+
+    delx = x[i1][0] - x[i2][0];
+    dely = x[i1][1] - x[i2][1];
+    delz = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx,dely,delz);
+
+    rsq = delx*delx + dely*dely + delz*delz;
+    r = sqrt(rsq);
+
+    if (r>r1[type]) continue;
+
+    dr = r - r0[type];
+    rk = k[type] * dr;
+
+    // force & energy
+
+    if (r > 0.0) fbond = -2.0*rk/r;
+    else fbond = 0.0;
+
+    if (EFLAG) ebond = k[type]*(dr*dr -(r0[type]-r1[type])*(r0[type]-r1[type]) );
+
+    // apply force to each of 2 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += delx*fbond;
+      f[i1][1] += dely*fbond;
+      f[i1][2] += delz*fbond;
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= delx*fbond;
+      f[i2][1] -= dely*fbond;
+      f[i2][2] -= delz*fbond;
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND,
+			     ebond,fbond,delx,dely,delz,thr);
+  }
+}
diff --git a/src/USER-OMP/bond_harmonic_shift_cut_omp.h b/src/USER-OMP/bond_harmonic_shift_cut_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..e324da73ac827c7a07678fed7dc94b01a07ff789
--- /dev/null
+++ b/src/USER-OMP/bond_harmonic_shift_cut_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef BOND_CLASS
+
+BondStyle(harmonic/shift/cut/omp,BondHarmonicShiftCutOMP)
+
+#else
+
+#ifndef LMP_BOND_HARMONIC_SHIFT_CUT_OMP_H
+#define LMP_BOND_HARMONIC_SHIFT_CUT_OMP_H
+
+#include "bond_harmonic_shift_cut.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class BondHarmonicShiftCutOMP : public BondHarmonicShiftCut, public ThrOMP {
+
+ public:
+    BondHarmonicShiftCutOMP(class LAMMPS *lmp) : 
+      BondHarmonicShiftCut(lmp), ThrOMP(lmp,THR_BOND) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/bond_harmonic_shift_omp.cpp b/src/USER-OMP/bond_harmonic_shift_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..917fff30befcaaba3ede6af7294c82fcd536f6f2
--- /dev/null
+++ b/src/USER-OMP/bond_harmonic_shift_omp.cpp
@@ -0,0 +1,121 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "bond_harmonic_shift_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+void BondHarmonicShiftOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nbondlist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void BondHarmonicShiftOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,n,type;
+  double delx,dely,delz,ebond,fbond;
+  double rsq,r,dr,rk;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const bondlist = neighbor->bondlist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = bondlist[n][0];
+    i2 = bondlist[n][1];
+    type = bondlist[n][2];
+
+    delx = x[i1][0] - x[i2][0];
+    dely = x[i1][1] - x[i2][1];
+    delz = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx,dely,delz);
+
+    rsq = delx*delx + dely*dely + delz*delz;
+    r = sqrt(rsq);
+    dr = r - r0[type];
+    rk = k[type] * dr;
+
+    // force & energy
+
+    if (r > 0.0) fbond = -2.0*rk/r;
+    else fbond = 0.0;
+
+    if (EFLAG) ebond = k[type]*(dr*dr -(r0[type]-r1[type])*(r0[type]-r1[type]) );
+
+    // apply force to each of 2 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += delx*fbond;
+      f[i1][1] += dely*fbond;
+      f[i1][2] += delz*fbond;
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= delx*fbond;
+      f[i2][1] -= dely*fbond;
+      f[i2][2] -= delz*fbond;
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND,
+			     ebond,fbond,delx,dely,delz,thr);
+  }
+}
diff --git a/src/USER-OMP/bond_harmonic_shift_omp.h b/src/USER-OMP/bond_harmonic_shift_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..d5a45472a6de2d6abdd46b7150b07fbbaf2b924d
--- /dev/null
+++ b/src/USER-OMP/bond_harmonic_shift_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef BOND_CLASS
+
+BondStyle(harmonic/shift/omp,BondHarmonicShiftOMP)
+
+#else
+
+#ifndef LMP_BOND_HARMONIC_SHIFT_OMP_H
+#define LMP_BOND_HARMONIC_SHIFT_OMP_H
+
+#include "bond_harmonic_shift.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class BondHarmonicShiftOMP : public BondHarmonicShift, public ThrOMP {
+
+ public:
+    BondHarmonicShiftOMP(class LAMMPS *lmp) : 
+      BondHarmonicShift(lmp), ThrOMP(lmp,THR_BOND) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/bond_morse_omp.cpp b/src/USER-OMP/bond_morse_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6b89507a65b7bf927907436c96348b4f4e2c78f9
--- /dev/null
+++ b/src/USER-OMP/bond_morse_omp.cpp
@@ -0,0 +1,122 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "bond_morse_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+void BondMorseOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nbondlist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void BondMorseOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,n,type;
+  double delx,dely,delz,ebond,fbond;
+  double rsq,r,dr,ralpha;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const bondlist = neighbor->bondlist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = bondlist[n][0];
+    i2 = bondlist[n][1];
+    type = bondlist[n][2];
+
+    delx = x[i1][0] - x[i2][0];
+    dely = x[i1][1] - x[i2][1];
+    delz = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx,dely,delz);
+
+    rsq = delx*delx + dely*dely + delz*delz;
+    r = sqrt(rsq);
+
+    dr = r - r0[type];
+    ralpha = exp(-alpha[type]*dr);
+
+    // force & energy
+
+    if (r > 0.0) fbond = -2.0*d0[type]*alpha[type]*(1-ralpha)*ralpha/r;
+    else fbond = 0.0;
+
+    if (EFLAG) ebond = d0[type]*(1-ralpha)*(1-ralpha);
+
+    // apply force to each of 2 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += delx*fbond;
+      f[i1][1] += dely*fbond;
+      f[i1][2] += delz*fbond;
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= delx*fbond;
+      f[i2][1] -= dely*fbond;
+      f[i2][2] -= delz*fbond;
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND,
+			     ebond,fbond,delx,dely,delz,thr);
+  }
+}
diff --git a/src/USER-OMP/bond_morse_omp.h b/src/USER-OMP/bond_morse_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..0deb731e2c5b0a9fa20d4c63b8c1427a90b540ea
--- /dev/null
+++ b/src/USER-OMP/bond_morse_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef BOND_CLASS
+
+BondStyle(morse/omp,BondMorseOMP)
+
+#else
+
+#ifndef LMP_BOND_MORSE_OMP_H
+#define LMP_BOND_MORSE_OMP_H
+
+#include "bond_morse.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class BondMorseOMP : public BondMorse, public ThrOMP {
+
+ public:
+    BondMorseOMP(class LAMMPS *lmp) : 
+      BondMorse(lmp), ThrOMP(lmp,THR_BOND) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/bond_nonlinear_omp.cpp b/src/USER-OMP/bond_nonlinear_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..574022d86d4b89095d2d3f0006a9bd4c561dde2c
--- /dev/null
+++ b/src/USER-OMP/bond_nonlinear_omp.cpp
@@ -0,0 +1,122 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "bond_nonlinear_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+void BondNonlinearOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nbondlist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void BondNonlinearOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,n,type;
+  double delx,dely,delz,ebond,fbond;
+  double rsq,r,dr,drsq,lamdasq,denom,denomsq;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const bondlist = neighbor->bondlist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = bondlist[n][0];
+    i2 = bondlist[n][1];
+    type = bondlist[n][2];
+
+    delx = x[i1][0] - x[i2][0];
+    dely = x[i1][1] - x[i2][1];
+    delz = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx,dely,delz);
+
+    rsq = delx*delx + dely*dely + delz*delz;
+    r = sqrt(rsq);
+    dr = r - r0[type];
+    drsq = dr*dr;
+    lamdasq = lamda[type]*lamda[type];
+    denom = lamdasq - drsq;
+    denomsq = denom*denom;
+
+    // force & energy
+
+    fbond = -epsilon[type]/r * 2.0*dr*lamdasq/denomsq;
+    if (EFLAG) ebond = epsilon[type] * drsq / denom;
+
+    // apply force to each of 2 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += delx*fbond;
+      f[i1][1] += dely*fbond;
+      f[i1][2] += delz*fbond;
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= delx*fbond;
+      f[i2][1] -= dely*fbond;
+      f[i2][2] -= delz*fbond;
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND,
+			     ebond,fbond,delx,dely,delz,thr);
+  }
+}
diff --git a/src/USER-OMP/bond_nonlinear_omp.h b/src/USER-OMP/bond_nonlinear_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..a104a9af445dc1065bfd53215e7ad7f9370d2bdd
--- /dev/null
+++ b/src/USER-OMP/bond_nonlinear_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef BOND_CLASS
+
+BondStyle(nonlinear/omp,BondNonlinearOMP)
+
+#else
+
+#ifndef LMP_BOND_NONLINEAR_OMP_H
+#define LMP_BOND_NONLINEAR_OMP_H
+
+#include "bond_nonlinear.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class BondNonlinearOMP : public BondNonlinear, public ThrOMP {
+
+ public:
+    BondNonlinearOMP(class LAMMPS *lmp) : 
+      BondNonlinear(lmp), ThrOMP(lmp,THR_BOND) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/bond_quartic_omp.cpp b/src/USER-OMP/bond_quartic_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..23b3bc3fbd25bf26a712184ab239eb2a3472f5be
--- /dev/null
+++ b/src/USER-OMP/bond_quartic_omp.cpp
@@ -0,0 +1,190 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "bond_quartic_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+#include "pair.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+void BondQuarticOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+ // insure pair->ev_tally() will use 1-4 virial contribution
+
+  if (vflag_global == 2)
+    force->pair->vflag_either = force->pair->vflag_global = 1;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nbondlist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void BondQuarticOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,n,m,type,itype,jtype;
+  double delx,dely,delz,ebond,fbond,evdwl,fpair;
+  double r,rsq,dr,r2,ra,rb,sr2,sr6;
+
+  ebond = evdwl = 0.0;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  int * const * const bondlist = neighbor->bondlist;
+  const double * const * const cutsq = force->pair->cutsq;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+
+    // skip bond if already broken
+
+    if (bondlist[n][2] <= 0) continue;
+
+    i1 = bondlist[n][0];
+    i2 = bondlist[n][1];
+    type = bondlist[n][2];
+
+    delx = x[i1][0] - x[i2][0];
+    dely = x[i1][1] - x[i2][1];
+    delz = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx,dely,delz);
+
+    rsq = delx*delx + dely*dely + delz*delz;
+
+    // if bond breaks, set type to 0
+    //   both in temporary bondlist and permanent bond_type
+    // if this proc owns both atoms,
+    //   negate bond_type twice if other atom stores it
+    // if other proc owns 2nd atom, other proc will also break bond
+
+    if (rsq > rc[type]*rc[type]) {
+      bondlist[n][2] = 0;
+      for (m = 0; m < atom->num_bond[i1]; m++)
+	if (atom->bond_atom[i1][m] == atom->tag[i2])
+	  atom->bond_type[i1][m] = 0;
+      if (i2 < atom->nlocal)
+	for (m = 0; m < atom->num_bond[i2]; m++)
+	  if (atom->bond_atom[i2][m] == atom->tag[i1])
+	    atom->bond_type[i2][m] = 0;
+      continue;
+    }
+
+    // quartic bond
+    // 1st portion is from quartic term
+    // 2nd portion is from LJ term cut at 2^(1/6) with eps = sigma = 1.0
+
+    r = sqrt(rsq);
+    dr = r - rc[type];
+    r2 = dr*dr;
+    ra = dr - b1[type];
+    rb = dr - b2[type];
+    fbond = -k[type]/r * (r2*(ra+rb) + 2.0*dr*ra*rb);
+    
+    if (rsq < TWO_1_3) {
+      sr2 = 1.0/rsq;
+      sr6 = sr2*sr2*sr2;
+      fbond += 48.0*sr6*(sr6-0.5)/rsq;
+    }
+
+    if (EFLAG) {
+      ebond = k[type]*r2*ra*rb + u0[type];
+      if (rsq < TWO_1_3) ebond += 4.0*sr6*(sr6-1.0) + 1.0;
+    }
+
+    // apply force to each of 2 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += delx*fbond;
+      f[i1][1] += dely*fbond;
+      f[i1][2] += delz*fbond;
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= delx*fbond;
+      f[i2][1] -= dely*fbond;
+      f[i2][2] -= delz*fbond;
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND,ebond,fbond,delx,dely,delz,thr);
+
+    // subtract out pairwise contribution from 2 atoms via pair->single()
+    // required since special_bond = 1,1,1
+    // tally energy/virial in pair, using newton_bond as newton flag
+
+    itype = atom->type[i1];
+    jtype = atom->type[i2];
+
+    if (rsq < cutsq[itype][jtype]) {
+      evdwl = -force->pair->single(i1,i2,itype,jtype,rsq,1.0,1.0,fpair);
+      fpair = -fpair;
+
+      if (NEWTON_BOND || i1 < nlocal) {
+	f[i1][0] += delx*fpair;
+	f[i1][1] += dely*fpair;
+	f[i1][2] += delz*fpair;
+      }
+      if (NEWTON_BOND || i2 < nlocal) {
+	f[i2][0] -= delx*fpair;
+	f[i2][1] -= dely*fpair;
+	f[i2][2] -= delz*fpair;
+      }
+
+      if (EVFLAG) ev_tally_thr(force->pair,i1,i2,nlocal,NEWTON_BOND,
+			       evdwl,0.0,fpair,delx,dely,delz,thr);
+    }
+  }
+}
diff --git a/src/USER-OMP/bond_quartic_omp.h b/src/USER-OMP/bond_quartic_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..f2dcec833be9e6debcb1ea2893e16bcee2f623e8
--- /dev/null
+++ b/src/USER-OMP/bond_quartic_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef BOND_CLASS
+
+BondStyle(quartic/omp,BondQuarticOMP)
+
+#else
+
+#ifndef LMP_BOND_QUARTIC_OMP_H
+#define LMP_BOND_QUARTIC_OMP_H
+
+#include "bond_quartic.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class BondQuarticOMP : public BondQuartic, public ThrOMP {
+
+ public:
+    BondQuarticOMP(class LAMMPS *lmp) : 
+      BondQuartic(lmp), ThrOMP(lmp,THR_BOND) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/bond_table_omp.cpp b/src/USER-OMP/bond_table_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f822f01fa2483d3c2143f2857ef71b8792320fa9
--- /dev/null
+++ b/src/USER-OMP/bond_table_omp.cpp
@@ -0,0 +1,119 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "bond_table_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+void BondTableOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nbondlist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void BondTableOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,n,type;
+  double delx,dely,delz,ebond,fbond;
+  double rsq,r;
+  double u,mdu;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const bondlist = neighbor->bondlist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = bondlist[n][0];
+    i2 = bondlist[n][1];
+    type = bondlist[n][2];
+
+    delx = x[i1][0] - x[i2][0];
+    dely = x[i1][1] - x[i2][1];
+    delz = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx,dely,delz);
+
+    rsq = delx*delx + dely*dely + delz*delz;
+    r = sqrt(rsq);
+
+    // force & energy
+
+    uf_lookup(type,r,u,mdu);
+    fbond = mdu/r;
+    ebond = u;
+
+    // apply force to each of 2 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += delx*fbond;
+      f[i1][1] += dely*fbond;
+      f[i1][2] += delz*fbond;
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= delx*fbond;
+      f[i2][1] -= dely*fbond;
+      f[i2][2] -= delz*fbond;
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND,
+			     ebond,fbond,delx,dely,delz,thr);
+  }
+}
diff --git a/src/USER-OMP/bond_table_omp.h b/src/USER-OMP/bond_table_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..3cf8022226de57dbcc50f274df2fa38852a0e829
--- /dev/null
+++ b/src/USER-OMP/bond_table_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef BOND_CLASS
+
+BondStyle(table/omp,BondTableOMP)
+
+#else
+
+#ifndef LMP_BOND_TABLE_OMP_H
+#define LMP_BOND_TABLE_OMP_H
+
+#include "bond_table.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class BondTableOMP : public BondTable, public ThrOMP {
+
+ public:
+    BondTableOMP(class LAMMPS *lmp) : 
+      BondTable(lmp), ThrOMP(lmp,THR_BOND) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/ewald_omp.cpp b/src/USER-OMP/ewald_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ea2c33d5d90ed046d8d04791812adc8c527929d7
--- /dev/null
+++ b/src/USER-OMP/ewald_omp.cpp
@@ -0,0 +1,386 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL)
+------------------------------------------------------------------------- */
+
+#include "mpi.h"
+#include "ewald_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "memory.h"
+
+#include <math.h>
+
+#include "math_const.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define SMALL 0.00001
+
+/* ---------------------------------------------------------------------- */
+
+EwaldOMP::EwaldOMP(LAMMPS *lmp, int narg, char **arg) 
+  : Ewald(lmp, narg, arg), ThrOMP(lmp, THR_KSPACE)
+{ }
+
+/* ---------------------------------------------------------------------- */
+void EwaldOMP::allocate()
+{
+  Ewald::allocate();
+
+  // always re-allocate for simplicity.
+  delete[] sfacrl;
+  delete[] sfacim;
+
+  sfacrl = new double[kmax3d*comm->nthreads];
+  sfacim = new double[kmax3d*comm->nthreads];
+}
+
+/* ----------------------------------------------------------------------
+   compute the Ewald long-range force, energy, virial 
+------------------------------------------------------------------------- */
+
+void EwaldOMP::compute(int eflag, int vflag)
+{
+  // clear out global energy/virial
+
+  energy = 0.0;
+  if (vflag) for (int n = 0; n < 6; n++) virial[n] = 0.0;
+
+  // extend size of per-atom arrays if necessary
+
+  if (atom->nlocal > nmax) {
+    memory->destroy(ek);
+    memory->destroy3d_offset(cs,-kmax_created);
+    memory->destroy3d_offset(sn,-kmax_created);
+    nmax = atom->nmax;
+    memory->create(ek,nmax,3,"ewald:ek");
+    memory->create3d_offset(cs,-kmax,kmax,3,nmax,"ewald:cs");
+    memory->create3d_offset(sn,-kmax,kmax,3,nmax,"ewald:sn");
+    kmax_created = kmax;
+  }
+
+  // partial structure factors on each processor
+  // total structure factor by summing over procs
+
+  eik_dot_r(); 
+  MPI_Allreduce(sfacrl,sfacrl_all,kcount,MPI_DOUBLE,MPI_SUM,world);
+  MPI_Allreduce(sfacim,sfacim_all,kcount,MPI_DOUBLE,MPI_SUM,world);
+
+  // K-space portion of electric field
+  // double loop over K-vectors and local atoms
+
+  double * const * const f = atom->f;
+  const double * const q = atom->q;
+  const int nthreads = comm->nthreads;
+  const int nlocal = atom->nlocal;
+  const double qscale = force->qqrd2e * scale;
+
+  double eng_tmp = 0.0;
+  double v0,v1,v2,v3,v4,v5;
+  v0=v1=v2=v3=v4=v5=0.0;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag) reduction(+:eng_tmp,v0,v1,v2,v3,v4,v5)
+#endif
+  {
+
+    int i,k,ifrom,ito,tid;
+    int kx,ky,kz;
+    double cypz,sypz,exprl,expim,partial;
+
+    loop_setup_thr(ifrom, ito, tid, nlocal, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, 0, NULL, NULL, thr);
+
+    for (i = ifrom; i < ito; i++) {
+      ek[i][0] = 0.0;
+      ek[i][1] = 0.0;
+      ek[i][2] = 0.0;
+    }
+
+    for (k = 0; k < kcount; k++) {
+      kx = kxvecs[k];
+      ky = kyvecs[k];
+      kz = kzvecs[k];
+
+      for (i = ifrom; i < ito; i++) {
+	cypz = cs[ky][1][i]*cs[kz][2][i] - sn[ky][1][i]*sn[kz][2][i];
+	sypz = sn[ky][1][i]*cs[kz][2][i] + cs[ky][1][i]*sn[kz][2][i];
+	exprl = cs[kx][0][i]*cypz - sn[kx][0][i]*sypz;
+	expim = sn[kx][0][i]*cypz + cs[kx][0][i]*sypz;
+	partial = expim*sfacrl_all[k] - exprl*sfacim_all[k];
+	ek[i][0] += partial*eg[k][0];
+	ek[i][1] += partial*eg[k][1];
+	ek[i][2] += partial*eg[k][2];
+      }
+    }
+
+    // convert E-field to force
+
+    for (i = ifrom; i < ito; i++) {
+      const double fac = qscale*q[i];
+      f[i][0] += fac*ek[i][0];
+      f[i][1] += fac*ek[i][1];
+      f[i][2] += fac*ek[i][2];
+    }
+ 
+    // energy if requested
+
+    if (eflag) {
+#if defined(_OPENMP)
+#pragma omp for private(k)
+#endif
+      for (k = 0; k < kcount; k++)
+	eng_tmp += ug[k] * (sfacrl_all[k]*sfacrl_all[k] + 
+			    sfacim_all[k]*sfacim_all[k]);
+    }
+
+    // virial if requested
+
+    if (vflag) {
+#if defined(_OPENMP)
+#pragma omp for private(k)
+#endif
+      for (k = 0; k < kcount; k++) {
+	double uk = ug[k] * (sfacrl_all[k]*sfacrl_all[k] + sfacim_all[k]*sfacim_all[k]);
+	v0 += uk*vg[k][0];
+	v1 += uk*vg[k][1];
+	v2 += uk*vg[k][2];
+	v3 += uk*vg[k][3];
+	v4 += uk*vg[k][4];
+	v5 += uk*vg[k][5];
+      }
+    }
+
+    reduce_thr(this, eflag,vflag,thr);
+  } // end of omp parallel region
+
+  if (eflag) {
+    eng_tmp -= g_ewald*qsqsum/MY_PIS +
+      MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
+    energy = eng_tmp * qscale;
+  }
+
+  if (vflag) {
+    virial[0] = v0 * qscale;
+    virial[1] = v1 * qscale;
+    virial[2] = v2 * qscale;
+    virial[3] = v3 * qscale;
+    virial[4] = v4 * qscale;
+    virial[5] = v5 * qscale;
+  }
+
+  if (slabflag) slabcorr(eflag);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void EwaldOMP::eik_dot_r()
+{
+  const double * const * const x = atom->x;
+  const double * const q = atom->q;
+  const int nlocal = atom->nlocal;
+  const int nthreads = comm->nthreads;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none)
+#endif
+  {
+    int i,ifrom,ito,k,l,m,n,ic,tid;
+    double cstr1,sstr1,cstr2,sstr2,cstr3,sstr3,cstr4,sstr4;
+    double sqk,clpm,slpm;
+    
+    loop_setup_thr(ifrom, ito, tid, nlocal, nthreads);
+    
+    double * const sfacrl_thr = sfacrl + tid*kmax3d;
+    double * const sfacim_thr = sfacim + tid*kmax3d;
+
+    n = 0;
+
+    // (k,0,0), (0,l,0), (0,0,m)
+
+    for (ic = 0; ic < 3; ic++) {
+      sqk = unitk[ic]*unitk[ic];
+      if (sqk <= gsqmx) {
+	cstr1 = 0.0;
+	sstr1 = 0.0;
+	for (i = ifrom; i < ito; i++) {
+	  cs[0][ic][i] = 1.0;
+	  sn[0][ic][i] = 0.0;
+	  cs[1][ic][i] = cos(unitk[ic]*x[i][ic]);
+	  sn[1][ic][i] = sin(unitk[ic]*x[i][ic]);
+	  cs[-1][ic][i] = cs[1][ic][i];
+	  sn[-1][ic][i] = -sn[1][ic][i];
+	  cstr1 += q[i]*cs[1][ic][i];
+	  sstr1 += q[i]*sn[1][ic][i];
+	}
+	sfacrl_thr[n] = cstr1;
+	sfacim_thr[n++] = sstr1;
+      }
+    }
+
+    for (m = 2; m <= kmax; m++) {
+      for (ic = 0; ic < 3; ic++) {
+	sqk = m*unitk[ic] * m*unitk[ic];
+	if (sqk <= gsqmx) {
+	  cstr1 = 0.0;
+	  sstr1 = 0.0;
+	  for (i = ifrom; i < ito; i++) {
+	    cs[m][ic][i] = cs[m-1][ic][i]*cs[1][ic][i] - 
+	      sn[m-1][ic][i]*sn[1][ic][i];
+	    sn[m][ic][i] = sn[m-1][ic][i]*cs[1][ic][i] + 
+	      cs[m-1][ic][i]*sn[1][ic][i];
+	    cs[-m][ic][i] = cs[m][ic][i];
+	    sn[-m][ic][i] = -sn[m][ic][i];
+	    cstr1 += q[i]*cs[m][ic][i];
+	    sstr1 += q[i]*sn[m][ic][i];
+	  }
+	  sfacrl_thr[n] = cstr1;
+	  sfacim_thr[n++] = sstr1;
+	}
+      }
+    }
+
+    // 1 = (k,l,0), 2 = (k,-l,0)
+
+    for (k = 1; k <= kmax; k++) {
+      for (l = 1; l <= kmax; l++) {
+	sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]);
+	if (sqk <= gsqmx) {
+	  cstr1 = 0.0;
+	  sstr1 = 0.0;
+	  cstr2 = 0.0;
+	  sstr2 = 0.0;
+	  for (i = ifrom; i < ito; i++) {
+	    cstr1 += q[i]*(cs[k][0][i]*cs[l][1][i] - sn[k][0][i]*sn[l][1][i]);
+	    sstr1 += q[i]*(sn[k][0][i]*cs[l][1][i] + cs[k][0][i]*sn[l][1][i]);
+	    cstr2 += q[i]*(cs[k][0][i]*cs[l][1][i] + sn[k][0][i]*sn[l][1][i]);
+	    sstr2 += q[i]*(sn[k][0][i]*cs[l][1][i] - cs[k][0][i]*sn[l][1][i]);
+	  }
+	  sfacrl_thr[n] = cstr1;
+	  sfacim_thr[n++] = sstr1;
+	  sfacrl_thr[n] = cstr2;
+	  sfacim_thr[n++] = sstr2;
+	}
+      }
+    }
+
+    // 1 = (0,l,m), 2 = (0,l,-m)
+
+    for (l = 1; l <= kmax; l++) {
+      for (m = 1; m <= kmax; m++) {
+	sqk = (l*unitk[1] * l*unitk[1]) + (m*unitk[2] * m*unitk[2]);
+	if (sqk <= gsqmx) {
+	  cstr1 = 0.0;
+	  sstr1 = 0.0;
+	  cstr2 = 0.0;
+	  sstr2 = 0.0;
+	  for (i = ifrom; i < ito; i++) {
+	    cstr1 += q[i]*(cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]);
+	    sstr1 += q[i]*(sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]);
+	    cstr2 += q[i]*(cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]);
+	    sstr2 += q[i]*(sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]);
+	  }
+	  sfacrl_thr[n] = cstr1;
+	  sfacim_thr[n++] = sstr1;
+	  sfacrl_thr[n] = cstr2;
+	  sfacim_thr[n++] = sstr2;
+	}
+      }
+    }
+
+    // 1 = (k,0,m), 2 = (k,0,-m)
+
+    for (k = 1; k <= kmax; k++) {
+      for (m = 1; m <= kmax; m++) {
+	sqk = (k*unitk[0] * k*unitk[0]) + (m*unitk[2] * m*unitk[2]);
+	if (sqk <= gsqmx) {
+	  cstr1 = 0.0;
+	  sstr1 = 0.0;
+	  cstr2 = 0.0;
+	  sstr2 = 0.0;
+	  for (i = ifrom; i < ito; i++) {
+	    cstr1 += q[i]*(cs[k][0][i]*cs[m][2][i] - sn[k][0][i]*sn[m][2][i]);
+	    sstr1 += q[i]*(sn[k][0][i]*cs[m][2][i] + cs[k][0][i]*sn[m][2][i]);
+	    cstr2 += q[i]*(cs[k][0][i]*cs[m][2][i] + sn[k][0][i]*sn[m][2][i]);
+	    sstr2 += q[i]*(sn[k][0][i]*cs[m][2][i] - cs[k][0][i]*sn[m][2][i]);
+	  }
+	  sfacrl_thr[n] = cstr1;
+	  sfacim_thr[n++] = sstr1;
+	  sfacrl_thr[n] = cstr2;
+	  sfacim_thr[n++] = sstr2;
+	}
+      }
+    }
+
+    // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m)
+
+    for (k = 1; k <= kmax; k++) {
+      for (l = 1; l <= kmax; l++) {
+	for (m = 1; m <= kmax; m++) {
+	  sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]) +
+	    (m*unitk[2] * m*unitk[2]);
+	  if (sqk <= gsqmx) {
+	    cstr1 = 0.0;
+	    sstr1 = 0.0;
+	    cstr2 = 0.0;
+	    sstr2 = 0.0;
+	    cstr3 = 0.0;
+	    sstr3 = 0.0;
+	    cstr4 = 0.0;
+	    sstr4 = 0.0;
+	    for (i = ifrom; i < ito; i++) {
+	      clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i];
+	      slpm = sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i];
+	      cstr1 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
+	      sstr1 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
+	    
+	      clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i];
+	      slpm = -sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i];
+	      cstr2 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
+	      sstr2 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
+	    
+	      clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i];
+	      slpm = sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i];
+	      cstr3 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
+	      sstr3 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
+	    
+	      clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i];
+	      slpm = -sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i];
+	      cstr4 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
+	      sstr4 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
+	    }
+	    sfacrl_thr[n] = cstr1;
+	    sfacim_thr[n++] = sstr1;
+	    sfacrl_thr[n] = cstr2;
+	    sfacim_thr[n++] = sstr2;
+	    sfacrl_thr[n] = cstr3;
+	    sfacim_thr[n++] = sstr3;
+	    sfacrl_thr[n] = cstr4;
+	    sfacim_thr[n++] = sstr4;
+	  }
+	}
+      }
+    }
+
+    sync_threads();
+    data_reduce_thr(sfacrl,kmax3d,nthreads,1,tid);
+    data_reduce_thr(sfacim,kmax3d,nthreads,1,tid);
+
+  } // end of parallel region
+}
diff --git a/src/USER-OMP/ewald_omp.h b/src/USER-OMP/ewald_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..515a74ed35eef2a07a4769fadf15e008586399da
--- /dev/null
+++ b/src/USER-OMP/ewald_omp.h
@@ -0,0 +1,42 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef KSPACE_CLASS
+
+KSpaceStyle(ewald/omp,EwaldOMP)
+
+#else
+
+#ifndef LMP_EWALD_OMP_H
+#define LMP_EWALD_OMP_H
+
+#include "ewald.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+  class EwaldOMP : public Ewald, public ThrOMP {
+ public:
+  EwaldOMP(class LAMMPS *, int, char **);
+  virtual ~EwaldOMP() { };
+  virtual void allocate();
+  virtual void compute(int, int);
+
+ protected:
+  virtual void eik_dot_r();
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/fix_omp.cpp b/src/USER-OMP/fix_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..29c577c489ba46c8d53fbd83a04f018de316eede
--- /dev/null
+++ b/src/USER-OMP/fix_omp.cpp
@@ -0,0 +1,279 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   OpenMP based threading support for LAMMPS
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "atom.h"
+#include "comm.h"
+#include "error.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_request.h"
+#include "update.h"
+#include "integrate.h"
+#include "min.h"
+
+#include "fix_omp.h"
+#include "thr_data.h"
+#include "thr_omp.h"
+
+#include "pair_hybrid.h"
+#include "bond_hybrid.h"
+#include "angle_hybrid.h"
+#include "dihedral_hybrid.h"
+#include "improper_hybrid.h"
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+using namespace LAMMPS_NS;
+
+static int get_tid()
+{
+  int tid = 0;
+#if defined(_OPENMP)
+  tid = omp_get_thread_num();
+#endif
+  return tid;
+}
+
+/* ---------------------------------------------------------------------- */
+
+FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg) :  Fix(lmp, narg, arg),
+  thr(NULL), last_omp_style(NULL), _nthr(-1), _neighbor(true), _newton(false)
+{
+  if ((narg < 4) || (narg > 6)) error->all(FLERR,"Illegal fix OMP command");
+  if (strcmp(arg[1],"all") != 0) error->all(FLERR,"Illegal fix OMP command");
+
+  int nthreads = 1;
+  if (narg > 3) {
+#if defined(_OPENMP)
+    if (strcmp(arg[3],"*") == 0)
+#pragma omp parallel default(none) shared(nthreads)
+      nthreads = omp_get_num_threads();
+    else
+      nthreads = atoi(arg[3]);
+#endif
+  }
+
+  if (nthreads < 1)
+    error->all(FLERR,"Illegal number of threads requested.");
+
+  if (nthreads != comm->nthreads) {
+#if defined(_OPENMP)
+    omp_set_num_threads(nthreads);
+#endif
+    comm->nthreads = nthreads;
+    if (comm->me == 0) {
+      if (screen)
+	fprintf(screen,"  reset %d OpenMP thread(s) per MPI task\n", nthreads);
+      if (logfile)
+	fprintf(logfile,"  reset %d OpenMP thread(s) per MPI task\n", nthreads);
+    }
+  }
+
+  if (narg > 4) {
+    if (strcmp(arg[4],"force/neigh") == 0)
+      _neighbor = true;
+    else if (strcmp(arg[4],"force") == 0)
+      _neighbor = false;
+    else
+      error->all(FLERR,"Illegal fix omp mode requested.");
+
+    if (comm->me == 0) {
+      const char * const mode = _neighbor ? "OpenMP capable" : "serial";
+      
+      if (screen)
+	fprintf(screen,"  using %s neighbor list subroutines\n", mode);
+      if (logfile)
+	fprintf(logfile,"  using %s neighbor list subroutines\n", mode);
+    }
+  }
+
+#if 0 /* to be enabled when we can switch between half and full neighbor lists */
+  if (narg > 5) {
+    if (strcmp(arg[5],"neigh/half") == 0)
+      _newton = true;
+    else if (strcmp(arg[5],"neigh/full") == 0)
+	_newton = false;
+    else
+      error->all(FLERR,"Illegal fix OMP command");
+
+    if (comm->me == 0) {
+      const char * const mode = _newton ? "half" : "full";
+      
+      if (screen)
+	fprintf(screen,"  using /omp styles with %s neighbor list builds\n", mode);
+      if (logfile)
+	fprintf(logfile,"  using /omp styles with %s neighbor list builds\n", mode);
+    }
+  }
+#endif
+
+  // allocate list for per thread accumulator manager class instances
+  // and then have each thread create an instance of this class to 
+  // encourage the OS to use storage that is "close" to each thread's CPU.
+  thr = new ThrData *[nthreads];
+  _nthr = nthreads;
+#if defined(_OPENMP)
+#pragma omp parallel default(none)
+#endif
+  {
+    const int tid = get_tid();
+    thr[tid] = new ThrData(tid);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+FixOMP::~FixOMP()
+{
+#if defined(_OPENMP)
+#pragma omp parallel default(none)
+#endif
+  {
+    const int tid = get_tid();
+    delete thr[tid];
+  }
+  delete[] thr;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int FixOMP::setmask()
+{
+  int mask = 0;
+  mask |= PRE_FORCE;
+  mask |= PRE_FORCE_RESPA;
+  mask |= MIN_PRE_FORCE;
+  return mask;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixOMP::init()
+{
+  if (strstr(update->integrate_style,"respa") != NULL)
+    error->all(FLERR,"Cannot use r-RESPA with /omp styles");
+
+  int check_hybrid;
+  last_omp_style = NULL;
+  char *last_omp_name = NULL;
+
+// determine which is the last force style with OpenMP
+// support as this is the one that has to reduce the forces
+
+#define CheckStyleForOMP(name)						\
+  check_hybrid = 0;							\
+  if (force->name) {							\
+    if ( (strcmp(force->name ## _style,"hybrid") == 0) ||		\
+         (strcmp(force->name ## _style,"hybrid/overlay") == 0) )	\
+      check_hybrid=1;							\
+    int len = strlen(force->name ## _style);				\
+    char *suffix = force->name ## _style + len - 4;			\
+    if (strcmp(suffix,"/omp") == 0) {					\
+      last_omp_name = force->name ## _style;				\
+      last_omp_style = (void *) force->name;				\
+    }									\
+  }
+
+#define CheckHybridForOMP(name,Class)					\
+  if (check_hybrid) {							\
+    Class ## Hybrid *style = (Class ## Hybrid *) force->name;		\
+    for (int i=0; i < style->nstyles; i++) {				\
+      int len = strlen(style->keywords[i]);				\
+      char *suffix = style->keywords[i] + len - 4;			\
+      if (strcmp(suffix,"/omp") == 0) {					\
+	last_omp_name = force->name ## _style;				\
+	last_omp_style = (void *) force->name;				\
+      }									\
+    }									\
+  }
+
+  CheckStyleForOMP(pair);
+  CheckHybridForOMP(pair,Pair);
+
+  CheckStyleForOMP(bond);
+  CheckHybridForOMP(bond,Bond);
+
+  CheckStyleForOMP(angle);
+  CheckHybridForOMP(angle,Angle);
+
+  CheckStyleForOMP(dihedral);
+  CheckHybridForOMP(dihedral,Dihedral);
+
+  CheckStyleForOMP(improper);
+  CheckHybridForOMP(improper,Improper);
+
+  CheckStyleForOMP(kspace);
+
+#undef CheckStyleForOMP
+#undef CheckHybridForOMP
+
+  set_neighbor_omp();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixOMP::set_neighbor_omp()
+{
+  // select or deselect multi-threaded neighbor
+  // list build depending on setting in package omp.
+  // NOTE: since we are at the top of the list of
+  // fixes, we cannot adjust neighbor lists from
+  // other fixes. those have to be re-implemented
+  // as /omp fix styles. :-(
+
+  const int neigh_omp = _neighbor ? 1 : 0;
+  const int nrequest = neighbor->nrequest;
+
+  for (int i = 0; i < nrequest; ++i)
+    neighbor->requests[i]->omp = neigh_omp;
+}
+
+/* ---------------------------------------------------------------------- */
+
+// adjust size and clear out per thread accumulator arrays
+void FixOMP::pre_force(int)
+{
+  const int nall = atom->nlocal + atom->nghost;
+
+  double **f = atom->f;
+  double **torque = atom->torque;
+  double *erforce = atom->erforce;
+  double *de = atom->de;
+  double *drho = atom->drho;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(f,torque,erforce,de,drho)
+#endif
+  {
+    const int tid = get_tid();
+    thr[tid]->check_tid(tid);
+    thr[tid]->init_force(nall,f,torque,erforce,de,drho);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double FixOMP::memory_usage()
+{
+  double bytes = comm->nthreads * (sizeof(ThrData *) + sizeof(ThrData));
+  bytes += comm->nthreads * thr[0]->memory_usage();
+  
+  return bytes;
+}
diff --git a/src/USER-OMP/fix_omp.h b/src/USER-OMP/fix_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..2167c281ec1787fb1a5f489fe91798427bbe2a6d
--- /dev/null
+++ b/src/USER-OMP/fix_omp.h
@@ -0,0 +1,71 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef FIX_CLASS
+
+FixStyle(OMP,FixOMP)
+
+#else
+
+#ifndef LMP_FIX_OMP_H
+#define LMP_FIX_OMP_H
+
+#include "fix.h"
+
+
+namespace LAMMPS_NS {
+
+class ThrData;
+
+class FixOMP : public Fix {
+  friend class ThrOMP;
+
+ public:
+  FixOMP(class LAMMPS *, int, char **);
+  virtual ~FixOMP();
+  virtual int setmask();
+  virtual void init();
+  virtual void pre_force(int);
+
+  virtual void setup_pre_force(int vflag)           { pre_force(vflag); };
+  virtual void min_setup_pre_force(int vflag)       { pre_force(vflag); };
+  virtual void min_pre_force(int vflag)             { pre_force(vflag); };
+  virtual void setup_pre_force_respa(int vflag,int) { pre_force(vflag); };
+  virtual void pre_force_respa(int vflag,int,int)   { pre_force(vflag); };
+
+  virtual double memory_usage();
+
+  ThrData *get_thr(int tid) { return thr[tid]; };
+  int get_nthr() const { return _nthr; }
+
+ protected:
+  ThrData **thr;
+  void *last_omp_style; // pointer to the style that needs
+						// to do the force reduction
+  
+ public:
+  bool get_neighbor() const {return _neighbor;};
+  bool get_newton() const   {return _newton;};
+
+ private:
+  int  _nthr;     // number of currently active ThrData object
+  bool _neighbor; // en/disable threads for neighbor list construction
+  bool _newton;   // en/disable newton's 3rd law for local atoms.
+
+  void set_neighbor_omp();
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/fix_peri_neigh_omp.cpp b/src/USER-OMP/fix_peri_neigh_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b8e07403a936201fd8aaeae951be55eac9de1c78
--- /dev/null
+++ b/src/USER-OMP/fix_peri_neigh_omp.cpp
@@ -0,0 +1,50 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Mike Parks (SNL)
+------------------------------------------------------------------------- */
+
+#include "fix_peri_neigh_omp.h"
+#include "fix_omp.h"
+#include "modify.h"
+#include "neighbor.h"
+#include "neigh_request.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+void FixPeriNeighOMP::init()
+{
+  if (!first) return;
+
+  // determine status of neighbor flag of the omp package command
+  int ifix = modify->find_fix("package_omp");
+  int use_omp = 0;
+  if (ifix >=0) {
+     FixOMP * fix = static_cast<FixOMP *>(lmp->modify->fix[ifix]);
+     if (fix->get_neighbor()) use_omp = 1;
+  }
+
+  // need a full neighbor list once
+
+  int irequest = neighbor->request((void *) this);
+  neighbor->requests[irequest]->pair = 0;
+  neighbor->requests[irequest]->fix  = 1;
+  neighbor->requests[irequest]->half = 0; 
+  neighbor->requests[irequest]->full = 1;
+  neighbor->requests[irequest]->omp = use_omp;
+  neighbor->requests[irequest]->occasional = 1;
+}
+
diff --git a/src/USER-OMP/fix_peri_neigh_omp.h b/src/USER-OMP/fix_peri_neigh_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..605bbae5f44f1e791e813ea993fa3fd66ddaa247
--- /dev/null
+++ b/src/USER-OMP/fix_peri_neigh_omp.h
@@ -0,0 +1,37 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef FIX_CLASS
+
+FixStyle(PERI_NEIGH_OMP,FixPeriNeighOMP)
+
+#else
+
+#ifndef LMP_FIX_PERI_NEIGH_OMP_H
+#define LMP_FIX_PERI_NEIGH_OMP_H
+
+#include "fix_peri_neigh.h"
+
+namespace LAMMPS_NS {
+
+class FixPeriNeighOMP : public FixPeriNeigh {
+
+ public:
+  FixPeriNeighOMP(class LAMMPS *lmp, int narg, char **argv) : FixPeriNeigh(lmp,narg,argv) {};
+  virtual void init();
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/fix_qeq_comb_omp.cpp b/src/USER-OMP/fix_qeq_comb_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f0a477fef6454c04c9a31db9baa2fe5ef6a967cd
--- /dev/null
+++ b/src/USER-OMP/fix_qeq_comb_omp.cpp
@@ -0,0 +1,160 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "mpi.h"
+#include <math.h>
+#include "fix_qeq_comb_omp.h"
+#include "atom.h"
+#include "force.h"
+#include "group.h"
+#include "memory.h"
+#include "error.h"
+#include "respa.h"
+#include "update.h"
+#include "pair_comb_omp.h"
+
+#include <string.h>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+FixQEQCombOMP::FixQEQCombOMP(LAMMPS *lmp, int narg, char **arg) 
+  : FixQEQComb(lmp, narg, arg)
+{
+  if (narg < 5) error->all(FLERR,"Illegal fix qeq/comb/omp command");
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixQEQCombOMP::init()
+{
+  if (!atom->q_flag)
+    error->all(FLERR,"Fix qeq/comb/omp requires atom attribute q");
+
+  comb = (PairComb *) force->pair_match("comb/omp",1);
+  if (comb == NULL)
+    comb = (PairComb *) force->pair_match("comb",1);
+  if (comb == NULL) error->all(FLERR,"Must use pair_style comb or comb/omp with fix qeq/comb");
+
+  if (strstr(update->integrate_style,"respa"))
+    nlevels_respa = ((Respa *) update->integrate)->nlevels;
+
+  ngroup = group->count(igroup);
+  if (ngroup == 0) error->all(FLERR,"Fix qeq/comb group has no atoms");
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixQEQCombOMP::post_force(int vflag)
+{
+  int i,iloop,loopmax;
+  double heatpq,qmass,dtq,dtq2;
+  double enegchkall,enegmaxall;
+
+  if (update->ntimestep % nevery) return;
+
+  // reallocate work arrays if necessary
+  // qf = charge force
+  // q1 = charge displacement
+  // q2 = tmp storage of charge force for next iteration
+
+  if (atom->nmax > nmax) {
+    memory->destroy(qf);
+    memory->destroy(q1);
+    memory->destroy(q2);
+    nmax = atom->nmax;
+    memory->create(qf,nmax,"qeq:qf");
+    memory->create(q1,nmax,"qeq:q1");
+    memory->create(q2,nmax,"qeq:q2");
+    vector_atom = qf;
+  }
+
+  // more loops for first-time charge equilibrium
+
+  iloop = 0; 
+  if (firstflag) loopmax = 5000;
+  else loopmax = 2000;
+
+  // charge-equilibration loop
+
+  if (me == 0 && fp)
+    fprintf(fp,"Charge equilibration on step " BIGINT_FORMAT "\n",
+	    update->ntimestep);
+
+  heatpq = 0.05;
+  qmass  = 0.000548580;
+  dtq    = 0.0006;
+  dtq2   = 0.5*dtq*dtq/qmass;
+
+  double enegchk = 0.0;
+  double enegtot = 0.0; 
+  double enegmax = 0.0;
+
+  double *q = atom->q;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+
+  for (i = 0; i < nlocal; i++)
+    q1[i] = q2[i] = qf[i] = 0.0;
+
+  for (iloop = 0; iloop < loopmax; iloop ++ ) {
+    for (i = 0; i < nlocal; i++)
+      if (mask[i] & groupbit) {
+	q1[i] += qf[i]*dtq2 - heatpq*q1[i];
+	q[i]  += q1[i]; 
+      }
+
+    enegtot = comb->yasu_char(qf,igroup);
+    enegtot /= ngroup;
+    enegchk = enegmax = 0.0;
+
+    for (i = 0; i < nlocal ; i++)
+      if (mask[i] & groupbit) {
+	q2[i] = enegtot-qf[i];
+	enegmax = MAX(enegmax,fabs(q2[i]));
+	enegchk += fabs(q2[i]);
+	qf[i] = q2[i];
+      }
+
+    MPI_Allreduce(&enegchk,&enegchkall,1,MPI_DOUBLE,MPI_SUM,world);
+    enegchk = enegchkall/ngroup;
+    MPI_Allreduce(&enegmax,&enegmaxall,1,MPI_DOUBLE,MPI_MAX,world);
+    enegmax = enegmaxall;
+
+    if (enegchk <= precision && enegmax <= 100.0*precision) break;
+
+    if (me == 0 && fp)
+      fprintf(fp,"  iteration: %d, enegtot %.6g, "
+	      "enegmax %.6g, fq deviation: %.6g\n",
+	      iloop,enegtot,enegmax,enegchk); 
+
+    for (i = 0; i < nlocal; i++)
+      if (mask[i] & groupbit)
+	q1[i] += qf[i]*dtq2 - heatpq*q1[i]; 
+  } 
+
+  if (me == 0 && fp) {
+    if (iloop == loopmax)
+      fprintf(fp,"Charges did not converge in %d iterations\n",iloop);
+    else
+      fprintf(fp,"Charges converged in %d iterations to %.10f tolerance\n",
+	      iloop,enegchk);
+  }
+}
+
diff --git a/src/USER-OMP/fix_qeq_comb_omp.h b/src/USER-OMP/fix_qeq_comb_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..0febe6b0aae9971f0805fd0ebd7ea93f472bce68
--- /dev/null
+++ b/src/USER-OMP/fix_qeq_comb_omp.h
@@ -0,0 +1,32 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef FIX_CLASS
+
+FixStyle(qeq/comb/omp,FixQEQCombOMP)
+
+#else
+
+#ifndef LMP_FIX_QEQ_COMB_OMP_H
+#define LMP_FIX_QEQ_COMB_OMP_H
+
+#include "fix_qeq_comb.h"
+
+namespace LAMMPS_NS {
+
+class FixQEQCombOMP : public FixQEQComb {
+ public:
+  FixQEQCombOMP(class LAMMPS *, int, char **);
+  virtual void init();
+  virtual void post_force(int);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/fix_wall_gran_omp.cpp b/src/USER-OMP/fix_wall_gran_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5857c921a65bd0132aa673890145db95484cdf17
--- /dev/null
+++ b/src/USER-OMP/fix_wall_gran_omp.cpp
@@ -0,0 +1,152 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "fix_wall_gran_omp.h"
+#include "atom.h"
+#include "update.h"
+
+using namespace LAMMPS_NS;
+
+
+enum{XPLANE,YPLANE,ZPLANE,ZCYLINDER};    // XYZ PLANE need to be 0,1,2
+enum{HOOKE,HOOKE_HISTORY,HERTZ_HISTORY};
+
+#define BIG 1.0e20
+
+/* ---------------------------------------------------------------------- */
+
+FixWallGranOMP::FixWallGranOMP(LAMMPS *lmp, int narg, char **arg) :
+  FixWallGran(lmp, narg, arg) { }
+
+/* ---------------------------------------------------------------------- */
+
+void FixWallGranOMP::post_force(int vflag)
+{
+  double vwall[3];
+
+  // set position of wall to initial settings and velocity to 0.0
+  // if wiggle or shear, set wall position and velocity accordingly
+
+  double wlo = lo;
+  double whi = hi;
+  vwall[0] = vwall[1] = vwall[2] = 0.0;
+  if (wiggle) {
+    double arg = omega * (update->ntimestep - time_origin) * dt;
+    if (wallstyle == axis) {
+      wlo = lo + amplitude - amplitude*cos(arg);
+      whi = hi + amplitude - amplitude*cos(arg);
+    }
+    vwall[axis] = amplitude*omega*sin(arg);
+  } else if (wshear) vwall[axis] = vshear;
+
+  // loop over all my atoms
+  // rsq = distance from wall
+  // dx,dy,dz = signed distance from wall
+  // for rotating cylinder, reset vwall based on particle position
+  // skip atom if not close enough to wall
+  //   if wall was set to NULL, it's skipped since lo/hi are infinity
+  // compute force and torque on atom if close enough to wall
+  //   via wall potential matched to pair potential
+  // set shear if pair potential stores history
+
+  double * const * const x = atom->x;
+  double * const * const v = atom->v;
+  double * const * const f = atom->f;
+  double * const * const omega = atom->omega;
+  double * const * const torque = atom->torque;
+  double * const radius = atom->radius;
+  double * const rmass = atom->rmass;
+  const int * const mask = atom->mask;
+  const int nlocal = atom->nlocal;
+
+  if (update->ntimestep > laststep) shearupdate = 1;
+  else shearupdate = 0;
+
+  int i;
+#if defined(_OPENMP)
+#pragma omp parallel for private(i) default(none) firstprivate(vwall,wlo,whi)
+#endif  
+  for (i = 0; i < nlocal; i++) {
+
+    if (mask[i] & groupbit) {
+      double dx,dy,dz,del1,del2,delxy,delr,rsq;
+
+      dx = dy = dz = 0.0;
+
+      if (wallstyle == XPLANE) {
+	del1 = x[i][0] - wlo;
+	del2 = whi - x[i][0];
+	if (del1 < del2) dx = del1;
+	else dx = -del2;
+      } else if (wallstyle == YPLANE) {
+	del1 = x[i][1] - wlo;
+	del2 = whi - x[i][1];
+	if (del1 < del2) dy = del1;
+	else dy = -del2;
+      } else if (wallstyle == ZPLANE) {
+	del1 = x[i][2] - wlo;
+	del2 = whi - x[i][2];
+	if (del1 < del2) dz = del1;
+	else dz = -del2;
+      } else if (wallstyle == ZCYLINDER) {
+        delxy = sqrt(x[i][0]*x[i][0] + x[i][1]*x[i][1]);
+	delr = cylradius - delxy;
+	if (delr > radius[i]) dz = cylradius;
+	else {
+	  dx = -delr/delxy * x[i][0];
+	  dy = -delr/delxy * x[i][1];
+	  if (wshear && axis != 2) {
+	    vwall[0] = vshear * x[i][1]/delxy;
+	    vwall[1] = -vshear * x[i][0]/delxy;
+	    vwall[2] = 0.0;
+	  }
+	}
+      }
+
+      rsq = dx*dx + dy*dy + dz*dz;
+
+      if (rsq > radius[i]*radius[i]) {
+	if (pairstyle != HOOKE) {
+	  shear[i][0] = 0.0;
+	  shear[i][1] = 0.0;
+	  shear[i][2] = 0.0;
+	}
+      } else {
+	if (pairstyle == HOOKE)
+	  hooke(rsq,dx,dy,dz,vwall,v[i],f[i],omega[i],torque[i],
+		radius[i],rmass[i]);
+	else if (pairstyle == HOOKE_HISTORY)
+	  hooke_history(rsq,dx,dy,dz,vwall,v[i],f[i],omega[i],torque[i],
+			radius[i],rmass[i],shear[i]);
+	else if (pairstyle == HERTZ_HISTORY)
+	  hertz_history(rsq,dx,dy,dz,vwall,v[i],f[i],omega[i],torque[i],
+			radius[i],rmass[i],shear[i]);
+      }
+    }
+  }
+
+  laststep = update->ntimestep;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixWallGranOMP::post_force_respa(int vflag, int ilevel, int iloop)
+{
+  if (ilevel == nlevels_respa-1) post_force(vflag);
+}
+
diff --git a/src/USER-OMP/fix_wall_gran_omp.h b/src/USER-OMP/fix_wall_gran_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..cfdfb9c75d90d5c71ba611f901ad47f619c18064
--- /dev/null
+++ b/src/USER-OMP/fix_wall_gran_omp.h
@@ -0,0 +1,38 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef FIX_CLASS
+
+FixStyle(wall/gran/omp,FixWallGranOMP)
+
+#else
+
+#ifndef LMP_FIX_WALL_GRAN_OMP_H
+#define LMP_FIX_WALL_GRAN_OMP_H
+
+#include "fix_wall_gran.h"
+
+namespace LAMMPS_NS {
+
+class FixWallGranOMP : public FixWallGran {
+
+ public:
+  FixWallGranOMP(class LAMMPS *, int, char **);
+  virtual void post_force(int);
+  virtual void post_force_respa(int, int, int);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/improper_class2_omp.cpp b/src/USER-OMP/improper_class2_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..cfadde435b50fa16d7e460aea561d57f58335aee
--- /dev/null
+++ b/src/USER-OMP/improper_class2_omp.cpp
@@ -0,0 +1,697 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "mpi.h"
+#include "math.h"
+#include "improper_class2_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "neighbor.h"
+#include "domain.h"
+#include "force.h"
+#include "update.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+#define TOLERANCE 0.05
+#define SMALL     0.001
+
+/* ---------------------------------------------------------------------- */
+
+void ImproperClass2OMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nimproperlist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <const int EVFLAG, const int EFLAG, const int NEWTON_BOND>
+void ImproperClass2OMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,i3,i4,i,j,k,n,type;
+  double eimproper;
+  double delr[3][3],rmag[3],rinvmag[3],rmag2[3];
+  double theta[3],costheta[3],sintheta[3];
+  double cossqtheta[3],sinsqtheta[3],invstheta[3];
+  double rABxrCB[3],rDBxrAB[3],rCBxrDB[3];
+  double ddelr[3][4],dr[3][4][3],dinvr[3][4][3];
+  double dthetadr[3][4][3],dinvsth[3][4][3];
+  double dinv3r[4][3],dinvs3r[3][4][3];
+  double drCBxrDB[3],rCBxdrDB[3],drDBxrAB[3],rDBxdrAB[3];
+  double drABxrCB[3],rABxdrCB[3];
+  double dot1,dot2,dd[3];
+  double fdot[3][4][3],ftmp,invs3r[3],inv3r;
+  double t,tt1,tt3,sc1;
+  double dotCBDBAB,dotDBABCB,dotABCBDB;
+  double chi,deltachi,d2chi,cossin2;
+  double drAB[3][4][3],drCB[3][4][3],drDB[3][4][3];
+  double dchi[3][4][3],dtotalchi[4][3];
+  double schiABCD,chiABCD,schiCBDA,chiCBDA,schiDBAC,chiDBAC;
+  double fabcd[4][3];
+
+  eimproper = 0.0;
+ 
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const improperlist = neighbor->improperlist;
+  const int nlocal = atom->nlocal;
+
+  for (i = 0; i < 3; i++)
+    for (j = 0; j < 4; j++)
+      for (k = 0; k < 3; k++) {
+	dthetadr[i][j][k] = 0.0;
+	drAB[i][j][k] = 0.0;
+	drCB[i][j][k] = 0.0;
+	drDB[i][j][k] = 0.0;
+      }
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = improperlist[n][0];
+    i2 = improperlist[n][1];
+    i3 = improperlist[n][2];
+    i4 = improperlist[n][3];
+    type = improperlist[n][4];
+
+    if (k0[type] == 0.0) continue;
+
+    // difference vectors
+
+    delr[0][0] = x[i1][0] - x[i2][0];
+    delr[0][1] = x[i1][1] - x[i2][1];
+    delr[0][2] = x[i1][2] - x[i2][2];
+    domain->minimum_image(delr[0]);
+
+    delr[1][0] = x[i3][0] - x[i2][0];
+    delr[1][1] = x[i3][1] - x[i2][1];
+    delr[1][2] = x[i3][2] - x[i2][2];
+    domain->minimum_image(delr[1]);
+
+    delr[2][0] = x[i4][0] - x[i2][0];
+    delr[2][1] = x[i4][1] - x[i2][1];
+    delr[2][2] = x[i4][2] - x[i2][2];
+    domain->minimum_image(delr[2]);
+
+    // bond lengths and associated values
+
+    for (i = 0; i < 3; i++) {
+      rmag2[i] = delr[i][0]*delr[i][0] + delr[i][1]*delr[i][1] + 
+	delr[i][2]*delr[i][2];
+      rmag[i] = sqrt(rmag2[i]);
+      rinvmag[i] = 1.0/rmag[i];
+    }
+
+    // angle ABC, CBD, ABD
+
+    costheta[0] = (delr[0][0]*delr[1][0] + delr[0][1]*delr[1][1] +  
+		   delr[0][2]*delr[1][2]) / (rmag[0]*rmag[1]);
+    costheta[1] = (delr[1][0]*delr[2][0] + delr[1][1]*delr[2][1] + 
+		   delr[1][2]*delr[2][2]) / (rmag[1]*rmag[2]);
+    costheta[2] = (delr[0][0]*delr[2][0] + delr[0][1]*delr[2][1] + 
+		   delr[0][2]*delr[2][2]) / (rmag[0]*rmag[2]);
+
+    // angle error check
+
+    for (i = 0; i < 3; i++) {
+      if (costheta[i] == -1.0) {
+	int me = comm->me;
+	if (screen) {
+	  char str[128];
+	  sprintf(str,
+		  "Improper problem: %d/%d " BIGINT_FORMAT " %d %d %d %d",
+		  me, thr->get_tid(),update->ntimestep,
+		  atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
+	  error->warning(FLERR,str,0);
+	  fprintf(screen,"  1st atom: %d %g %g %g\n",
+		  me,x[i1][0],x[i1][1],x[i1][2]);
+	  fprintf(screen,"  2nd atom: %d %g %g %g\n",
+		  me,x[i2][0],x[i2][1],x[i2][2]);
+	  fprintf(screen,"  3rd atom: %d %g %g %g\n",
+		  me,x[i3][0],x[i3][1],x[i3][2]);
+	  fprintf(screen,"  4th atom: %d %g %g %g\n",
+		  me,x[i4][0],x[i4][1],x[i4][2]);
+	}
+      }
+    }
+
+    for (i = 0; i < 3; i++) {
+      if (costheta[i] > 1.0)  costheta[i] = 1.0;
+      if (costheta[i] < -1.0) costheta[i] = -1.0;
+      theta[i] = acos(costheta[i]);
+      cossqtheta[i] = costheta[i]*costheta[i];
+      sintheta[i] = sin(theta[i]);
+      invstheta[i] = 1.0/sintheta[i];
+      sinsqtheta[i] = sintheta[i]*sintheta[i];
+    }
+
+    // cross & dot products
+
+    cross(delr[0],delr[1],rABxrCB);
+    cross(delr[2],delr[0],rDBxrAB);
+    cross(delr[1],delr[2],rCBxrDB);
+
+    dotCBDBAB = dot(rCBxrDB,delr[0]);
+    dotDBABCB = dot(rDBxrAB,delr[1]);
+    dotABCBDB = dot(rABxrCB,delr[2]);
+
+    t = rmag[0] * rmag[1] * rmag[2];
+    inv3r = 1.0/t;
+    invs3r[0] = invstheta[1] * inv3r;
+    invs3r[1] = invstheta[2] * inv3r;
+    invs3r[2] = invstheta[0] * inv3r;
+
+    // chi ABCD, CBDA, DBAC
+    // final chi is average of three
+
+    schiABCD = dotCBDBAB * invs3r[0];
+    chiABCD = asin(schiABCD);
+    schiCBDA = dotDBABCB * invs3r[1];
+    chiCBDA = asin(schiCBDA);
+    schiDBAC = dotABCBDB * invs3r[2];
+    chiDBAC = asin(schiDBAC);
+
+    chi = (chiABCD + chiCBDA + chiDBAC) / 3.0;
+    deltachi = chi - chi0[type];
+    d2chi = deltachi * deltachi;
+
+    // energy
+
+    if (EFLAG) eimproper = k0[type]*d2chi;
+
+    // forces
+    // define d(delr)
+    // i = bond AB/CB/DB, j = atom A/B/C/D
+
+    ddelr[0][0] = 1.0;
+    ddelr[0][1] = -1.0;
+    ddelr[0][2] = 0.0;
+    ddelr[0][3] = 0.0;
+    ddelr[1][0] = 0.0;
+    ddelr[1][1] = -1.0;
+    ddelr[1][2] = 1.0;
+    ddelr[1][3] = 0.0;
+    ddelr[2][0] = 0.0;
+    ddelr[2][1] = -1.0;
+    ddelr[2][2] = 0.0;
+    ddelr[2][3] = 1.0;
+
+    // compute d(|r|)/dr and d(1/|r|)/dr for each direction, bond and atom
+    // define d(r) for each r
+    // i = bond AB/CB/DB, j = atom A/B/C/D, k = X/Y/Z
+
+    for (i = 0; i < 3; i++)
+      for (j = 0; j < 4; j++)
+	for (k = 0; k < 3; k++) {
+	  dr[i][j][k] = delr[i][k] * ddelr[i][j] / rmag[i];
+	  dinvr[i][j][k] = -dr[i][j][k] / rmag2[i];
+	}
+
+    // compute d(1 / (|r_AB| * |r_CB| * |r_DB|) / dr
+    // i = atom A/B/C/D, j = X/Y/Z
+
+    for (i = 0; i < 4; i++)
+      for (j = 0; j < 3; j++)
+	dinv3r[i][j] = rinvmag[1] * (rinvmag[2] * dinvr[0][i][j] +
+				     rinvmag[0] * dinvr[2][i][j]) +
+	  rinvmag[2] * rinvmag[0] * dinvr[1][i][j];
+
+    // compute d(theta)/d(r) for 3 angles
+    // angleABC
+
+    tt1 = costheta[0] / rmag2[0];
+    tt3 = costheta[0] / rmag2[1];
+    sc1 = 1.0 / sqrt(1.0 - cossqtheta[0]);
+
+    dthetadr[0][0][0] = sc1 * ((tt1 * delr[0][0]) - 
+			       (delr[1][0] * rinvmag[0] * rinvmag[1]));
+    dthetadr[0][0][1] = sc1 * ((tt1 * delr[0][1]) - 
+			       (delr[1][1] * rinvmag[0] * rinvmag[1]));
+    dthetadr[0][0][2] = sc1 * ((tt1 * delr[0][2]) - 
+			       (delr[1][2] * rinvmag[0] * rinvmag[1]));
+    dthetadr[0][1][0] = -sc1 * ((tt1 * delr[0][0]) - 
+				(delr[1][0] * rinvmag[0] * rinvmag[1]) +
+				(tt3 * delr[1][0]) - 
+				(delr[0][0] * rinvmag[0] * rinvmag[1]));
+    dthetadr[0][1][1] = -sc1 * ((tt1 * delr[0][1]) - 
+				(delr[1][1] * rinvmag[0] * rinvmag[1]) +
+				(tt3 * delr[1][1]) - 
+				(delr[0][1] * rinvmag[0] * rinvmag[1]));
+    dthetadr[0][1][2] = -sc1 * ((tt1 * delr[0][2]) - 
+				(delr[1][2] * rinvmag[0] * rinvmag[1]) +
+				(tt3 * delr[1][2]) - 
+				(delr[0][2] * rinvmag[0] * rinvmag[1]));
+    dthetadr[0][2][0] = sc1 * ((tt3 * delr[1][0]) - 
+			       (delr[0][0] * rinvmag[0] * rinvmag[1]));
+    dthetadr[0][2][1] = sc1 * ((tt3 * delr[1][1]) - 
+			       (delr[0][1] * rinvmag[0] * rinvmag[1]));
+    dthetadr[0][2][2] = sc1 * ((tt3 * delr[1][2]) - 
+			       (delr[0][2] * rinvmag[0] * rinvmag[1]));
+
+    // angleCBD
+
+    tt1 = costheta[1] / rmag2[1];
+    tt3 = costheta[1] / rmag2[2];
+    sc1 = 1.0 / sqrt(1.0 - cossqtheta[1]);
+
+    dthetadr[1][2][0] = sc1 * ((tt1 * delr[1][0]) - 
+			       (delr[2][0] * rinvmag[1] * rinvmag[2]));
+    dthetadr[1][2][1] = sc1 * ((tt1 * delr[1][1]) - 
+			       (delr[2][1] * rinvmag[1] * rinvmag[2]));
+    dthetadr[1][2][2] = sc1 * ((tt1 * delr[1][2]) - 
+			       (delr[2][2] * rinvmag[1] * rinvmag[2]));
+    dthetadr[1][1][0] = -sc1 * ((tt1 * delr[1][0]) - 
+				(delr[2][0] * rinvmag[1] * rinvmag[2]) +
+				(tt3 * delr[2][0]) - 
+				(delr[1][0] * rinvmag[2] * rinvmag[1]));
+    dthetadr[1][1][1] = -sc1 * ((tt1 * delr[1][1]) - 
+				(delr[2][1] * rinvmag[1] * rinvmag[2]) +
+				(tt3 * delr[2][1]) - 
+				(delr[1][1] * rinvmag[2] * rinvmag[1]));
+    dthetadr[1][1][2] = -sc1 * ((tt1 * delr[1][2]) - 
+				(delr[2][2] * rinvmag[1] * rinvmag[2]) +
+				(tt3 * delr[2][2]) - 
+				(delr[1][2] * rinvmag[2] * rinvmag[1]));
+    dthetadr[1][3][0] = sc1 * ((tt3 * delr[2][0]) - 
+			       (delr[1][0] * rinvmag[2] * rinvmag[1]));
+    dthetadr[1][3][1] = sc1 * ((tt3 * delr[2][1]) - 
+			       (delr[1][1] * rinvmag[2] * rinvmag[1]));
+    dthetadr[1][3][2] = sc1 * ((tt3 * delr[2][2]) - 
+			       (delr[1][2] * rinvmag[2] * rinvmag[1]));
+
+    // angleABD
+
+    tt1 = costheta[2] / rmag2[0];
+    tt3 = costheta[2] / rmag2[2];
+    sc1 = 1.0 / sqrt(1.0 - cossqtheta[2]);
+
+    dthetadr[2][0][0] = sc1 * ((tt1 * delr[0][0]) - 
+			       (delr[2][0] * rinvmag[0] * rinvmag[2]));
+    dthetadr[2][0][1] = sc1 * ((tt1 * delr[0][1]) - 
+			       (delr[2][1] * rinvmag[0] * rinvmag[2]));
+    dthetadr[2][0][2] = sc1 * ((tt1 * delr[0][2]) - 
+			       (delr[2][2] * rinvmag[0] * rinvmag[2]));
+    dthetadr[2][1][0] = -sc1 * ((tt1 * delr[0][0]) - 
+				(delr[2][0] * rinvmag[0] * rinvmag[2]) +
+				(tt3 * delr[2][0]) - 
+				(delr[0][0] * rinvmag[2] * rinvmag[0]));
+    dthetadr[2][1][1] = -sc1 * ((tt1 * delr[0][1]) - 
+				(delr[2][1] * rinvmag[0] * rinvmag[2]) +
+				(tt3 * delr[2][1]) - 
+				(delr[0][1] * rinvmag[2] * rinvmag[0]));
+    dthetadr[2][1][2] = -sc1 * ((tt1 * delr[0][2]) - 
+				(delr[2][2] * rinvmag[0] * rinvmag[2]) +
+				(tt3 * delr[2][2]) - 
+				(delr[0][2] * rinvmag[2] * rinvmag[0]));
+    dthetadr[2][3][0] = sc1 * ((tt3 * delr[2][0]) - 
+			       (delr[0][0] * rinvmag[2] * rinvmag[0]));
+    dthetadr[2][3][1] = sc1 * ((tt3 * delr[2][1]) - 
+			       (delr[0][1] * rinvmag[2] * rinvmag[0]));
+    dthetadr[2][3][2] = sc1 * ((tt3 * delr[2][2]) - 
+			       (delr[0][2] * rinvmag[2] * rinvmag[0]));
+
+    // compute d( 1 / sin(theta))/dr
+    // i = angle, j = atom, k = direction
+
+    for (i = 0; i < 3; i++) {
+      cossin2 = -costheta[i] / sinsqtheta[i];
+      for (j = 0; j < 4; j++)
+	for (k = 0; k < 3; k++)
+	  dinvsth[i][j][k] = cossin2 * dthetadr[i][j][k];
+    }
+
+    // compute d(1 / sin(theta) * |r_AB| * |r_CB| * |r_DB|)/dr
+    // i = angle, j = atom
+
+    for (i = 0; i < 4; i++)
+      for (j = 0; j < 3; j++) {
+	dinvs3r[0][i][j] = (invstheta[1] * dinv3r[i][j]) +
+	  (inv3r * dinvsth[1][i][j]);
+	dinvs3r[1][i][j] = (invstheta[2] * dinv3r[i][j]) +
+	  (inv3r * dinvsth[2][i][j]);
+	dinvs3r[2][i][j] = (invstheta[0] * dinv3r[i][j]) +
+	  (inv3r * dinvsth[0][i][j]);
+      }
+
+    // drCB(i,j,k), etc
+    // i = vector X'/Y'/Z', j = atom A/B/C/D, k = direction X/Y/Z
+
+    for (i = 0; i < 3; i++) {
+      drCB[i][1][i] = -1.0;
+      drAB[i][1][i] = -1.0;
+      drDB[i][1][i] = -1.0;
+      drDB[i][3][i] = 1.0;
+      drCB[i][2][i] = 1.0;
+      drAB[i][0][i] = 1.0;
+    }
+
+    // d((r_CB x r_DB) dot r_AB)
+    // r_CB x d(r_DB)
+    // d(r_CB) x r_DB
+    // (r_CB x d(r_DB)) + (d(r_CB) x r_DB)
+    // (r_CB x d(r_DB)) + (d(r_CB) x r_DB) dot r_AB
+    // d(r_AB) dot (r_CB x r_DB)
+
+    for (i = 0; i < 3; i++)
+      for (j = 0; j < 4; j++) {
+	cross(delr[1],drDB[i][j],rCBxdrDB);
+	cross(drCB[i][j],delr[2],drCBxrDB);
+	for (k = 0; k < 3; k++) dd[k] = rCBxdrDB[k] + drCBxrDB[k];
+	dot1 = dot(dd,delr[0]);
+	dot2 = dot(rCBxrDB,drAB[i][j]);
+	fdot[0][j][i] = dot1 + dot2;
+      }
+
+    // d((r_DB x r_AB) dot r_CB)
+    // r_DB x d(r_AB)
+    // d(r_DB) x r_AB
+    // (r_DB x d(r_AB)) + (d(r_DB) x r_AB)
+    // (r_DB x d(r_AB)) + (d(r_DB) x r_AB) dot r_CB
+    // d(r_CB) dot (r_DB x r_AB)
+
+    for (i = 0; i < 3; i++)
+      for (j = 0; j < 4; j++) {
+	cross(delr[2],drAB[i][j],rDBxdrAB);
+	cross(drDB[i][j],delr[0],drDBxrAB);
+	for (k = 0; k < 3; k++) dd[k] = rDBxdrAB[k] + drDBxrAB[k];
+	dot1 = dot(dd,delr[1]);
+	dot2 = dot(rDBxrAB,drCB[i][j]);
+	fdot[1][j][i] = dot1 + dot2;
+      }
+
+    // d((r_AB x r_CB) dot r_DB)
+    // r_AB x d(r_CB)
+    // d(r_AB) x r_CB
+    // (r_AB x d(r_CB)) + (d(r_AB) x r_CB)
+    // (r_AB x d(r_CB)) + (d(r_AB) x r_CB) dot r_DB
+    // d(r_DB) dot (r_AB x r_CB)
+
+    for (i = 0; i < 3; i++)
+      for (j = 0; j < 4; j++) {
+	cross(delr[0],drCB[i][j],rABxdrCB);
+	cross(drAB[i][j],delr[1],drABxrCB);
+	for (k = 0; k < 3; k++) dd[k] = rABxdrCB[k] + drABxrCB[k];
+	dot1 = dot(dd,delr[2]);
+	dot2 = dot(rABxrCB,drDB[i][j]);
+	fdot[2][j][i] = dot1 + dot2;
+      }
+
+    // force on each atom
+
+    for (i = 0; i < 4; i++)
+      for (j = 0; j < 3; j++) {
+	ftmp = (fdot[0][i][j] * invs3r[0]) + 
+	  (dinvs3r[0][i][j] * dotCBDBAB);
+	dchi[0][i][j] = ftmp / cos(chiABCD);
+	ftmp = (fdot[1][i][j] * invs3r[1]) + 
+	  (dinvs3r[1][i][j] * dotDBABCB);
+	dchi[1][i][j] = ftmp / cos(chiCBDA);
+	ftmp = (fdot[2][i][j] * invs3r[2]) + 
+	  (dinvs3r[2][i][j] * dotABCBDB);
+	dchi[2][i][j] = ftmp / cos(chiDBAC);
+	dtotalchi[i][j] = (dchi[0][i][j]+dchi[1][i][j]+dchi[2][i][j]) / 3.0;
+      }
+
+    for (i = 0; i < 4; i++)
+      for (j = 0; j < 3; j++)
+	fabcd[i][j] = -2.0*k0[type] * deltachi*dtotalchi[i][j];
+
+    // apply force to each of 4 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += fabcd[0][0];
+      f[i1][1] += fabcd[0][1];
+      f[i1][2] += fabcd[0][2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] += fabcd[1][0];
+      f[i2][1] += fabcd[1][1];
+      f[i2][2] += fabcd[1][2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += fabcd[2][0];
+      f[i3][1] += fabcd[2][1];
+      f[i3][2] += fabcd[2][2];
+    }
+
+    if (NEWTON_BOND || i4 < nlocal) {
+      f[i4][0] += fabcd[3][0];
+      f[i4][1] += fabcd[3][1];
+      f[i4][2] += fabcd[3][2];
+    }
+
+    if (EVFLAG)
+      ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,eimproper,
+		   fabcd[0],fabcd[2],fabcd[3],
+		   delr[0][0],delr[0][1],delr[0][2],
+		   delr[1][0],delr[1][1],delr[1][2],
+		   delr[2][0]-delr[1][0],delr[2][1]-delr[1][1],
+		   delr[2][2]-delr[1][2],thr);
+  }
+
+  // compute angle-angle interactions
+  angleangle_thr<EVFLAG, EFLAG, NEWTON_BOND>(nfrom,nto,thr);
+}
+
+/* ----------------------------------------------------------------------
+   angle-angle interactions within improper
+------------------------------------------------------------------------- */
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void ImproperClass2OMP::angleangle_thr(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,i3,i4,i,j,k,n,type;
+  double eimproper;
+  double delxAB,delyAB,delzAB,rABmag2,rAB;
+  double delxBC,delyBC,delzBC,rBCmag2,rBC;
+  double delxBD,delyBD,delzBD,rBDmag2,rBD;
+  double costhABC,thetaABC,costhABD;
+  double thetaABD,costhCBD,thetaCBD,dthABC,dthCBD,dthABD;
+  double sc1,t1,t3,r12;
+  double dthetadr[3][4][3],fabcd[4][3];
+
+  eimproper = 0.0;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const improperlist = neighbor->improperlist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = improperlist[n][0];
+    i2 = improperlist[n][1];
+    i3 = improperlist[n][2];
+    i4 = improperlist[n][3];
+    type = improperlist[n][4];
+
+    // difference vectors
+
+    delxAB = x[i1][0] - x[i2][0];
+    delyAB = x[i1][1] - x[i2][1];
+    delzAB = x[i1][2] - x[i2][2];
+    domain->minimum_image(delxAB,delyAB,delzAB);
+
+    delxBC = x[i3][0] - x[i2][0];
+    delyBC = x[i3][1] - x[i2][1];
+    delzBC = x[i3][2] - x[i2][2];
+    domain->minimum_image(delxBC,delyBC,delzBC);
+
+    delxBD = x[i4][0] - x[i2][0];
+    delyBD = x[i4][1] - x[i2][1];
+    delzBD = x[i4][2] - x[i2][2];
+    domain->minimum_image(delxBD,delyBD,delzBD);
+
+    // bond lengths
+
+    rABmag2 = delxAB*delxAB + delyAB*delyAB + delzAB*delzAB;
+    rAB = sqrt(rABmag2);
+    rBCmag2 = delxBC*delxBC + delyBC*delyBC + delzBC*delzBC;
+    rBC = sqrt(rBCmag2);
+    rBDmag2 = delxBD*delxBD + delyBD*delyBD + delzBD*delzBD;
+    rBD = sqrt(rBDmag2);
+        
+    // angle ABC, ABD, CBD
+
+    costhABC = (delxAB*delxBC + delyAB*delyBC + delzAB*delzBC) / (rAB * rBC);
+    if (costhABC > 1.0)  costhABC = 1.0;
+    if (costhABC < -1.0) costhABC = -1.0;
+    thetaABC = acos(costhABC);
+
+    costhABD = (delxAB*delxBD + delyAB*delyBD + delzAB*delzBD) / (rAB * rBD);
+    if (costhABD > 1.0)  costhABD = 1.0;
+    if (costhABD < -1.0) costhABD = -1.0;
+    thetaABD = acos(costhABD);
+
+    costhCBD = (delxBC*delxBD + delyBC*delyBD + delzBC*delzBD) /(rBC * rBD);
+    if (costhCBD > 1.0)  costhCBD = 1.0;
+    if (costhCBD < -1.0) costhCBD = -1.0;
+    thetaCBD = acos(costhCBD);
+
+    dthABC = thetaABC - aa_theta0_1[type];
+    dthABD = thetaABD - aa_theta0_2[type];
+    dthCBD = thetaCBD - aa_theta0_3[type];
+
+    // energy
+
+    if (EFLAG) eimproper = aa_k2[type] * dthABC * dthABD + 
+		 aa_k1[type] * dthABC * dthCBD +
+		 aa_k3[type] * dthABD * dthCBD;
+
+    // d(theta)/d(r) array
+    // angle i, atom j, coordinate k
+
+    for (i = 0; i < 3; i++)
+      for (j = 0; j < 4; j++)
+	for (k = 0; k < 3; k++)
+	  dthetadr[i][j][k] = 0.0;
+
+    // angle ABC
+
+    sc1 = sqrt(1.0/(1.0 - costhABC*costhABC));
+    t1 = costhABC / rABmag2;
+    t3 = costhABC / rBCmag2;
+    r12 = 1.0 / (rAB * rBC);
+
+    dthetadr[0][0][0] = sc1 * ((t1 * delxAB) - (delxBC * r12));
+    dthetadr[0][0][1] = sc1 * ((t1 * delyAB) - (delyBC * r12));
+    dthetadr[0][0][2] = sc1 * ((t1 * delzAB) - (delzBC * r12));
+    dthetadr[0][1][0] = sc1 * ((-t1 * delxAB) + (delxBC * r12) +
+			       (-t3 * delxBC) + (delxAB * r12));
+    dthetadr[0][1][1] = sc1 * ((-t1 * delyAB) + (delyBC * r12) +
+			       (-t3 * delyBC) + (delyAB * r12));
+    dthetadr[0][1][2] = sc1 * ((-t1 * delzAB) + (delzBC * r12) +
+			       (-t3 * delzBC) + (delzAB * r12));
+    dthetadr[0][2][0] = sc1 * ((t3 * delxBC) - (delxAB * r12));
+    dthetadr[0][2][1] = sc1 * ((t3 * delyBC) - (delyAB * r12));
+    dthetadr[0][2][2] = sc1 * ((t3 * delzBC) - (delzAB * r12));
+
+    // angle CBD
+
+    sc1 = sqrt(1.0/(1.0 - costhCBD*costhCBD));
+    t1 = costhCBD / rBCmag2;
+    t3 = costhCBD / rBDmag2;
+    r12 = 1.0 / (rBC * rBD);
+
+    dthetadr[1][2][0] = sc1 * ((t1 * delxBC) - (delxBD * r12));
+    dthetadr[1][2][1] = sc1 * ((t1 * delyBC) - (delyBD * r12));
+    dthetadr[1][2][2] = sc1 * ((t1 * delzBC) - (delzBD * r12));
+    dthetadr[1][1][0] = sc1 * ((-t1 * delxBC) + (delxBD * r12) +
+			       (-t3 * delxBD) + (delxBC * r12));
+    dthetadr[1][1][1] = sc1 * ((-t1 * delyBC) + (delyBD * r12) +
+			       (-t3 * delyBD) + (delyBC * r12));
+    dthetadr[1][1][2] = sc1 * ((-t1 * delzBC) + (delzBD * r12) +
+			       (-t3 * delzBD) + (delzBC * r12));
+    dthetadr[1][3][0] = sc1 * ((t3 * delxBD) - (delxBC * r12));
+    dthetadr[1][3][1] = sc1 * ((t3 * delyBD) - (delyBC * r12));
+    dthetadr[1][3][2] = sc1 * ((t3 * delzBD) - (delzBC * r12));
+
+    // angle ABD
+
+    sc1 = sqrt(1.0/(1.0 - costhABD*costhABD));
+    t1 = costhABD / rABmag2;
+    t3 = costhABD / rBDmag2;
+    r12 = 1.0 / (rAB * rBD);
+
+    dthetadr[2][0][0] = sc1 * ((t1 * delxAB) - (delxBD * r12));
+    dthetadr[2][0][1] = sc1 * ((t1 * delyAB) - (delyBD * r12));
+    dthetadr[2][0][2] = sc1 * ((t1 * delzAB) - (delzBD * r12));
+    dthetadr[2][1][0] = sc1 * ((-t1 * delxAB) + (delxBD * r12) +
+			       (-t3 * delxBD) + (delxAB * r12));
+    dthetadr[2][1][1] = sc1 * ((-t1 * delyAB) + (delyBD * r12) +
+			       (-t3 * delyBD) + (delyAB * r12));
+    dthetadr[2][1][2] = sc1 * ((-t1 * delzAB) + (delzBD * r12) +
+			       (-t3 * delzBD) + (delzAB * r12));
+    dthetadr[2][3][0] = sc1 * ((t3 * delxBD) - (delxAB * r12));
+    dthetadr[2][3][1] = sc1 * ((t3 * delyBD) - (delyAB * r12));
+    dthetadr[2][3][2] = sc1 * ((t3 * delzBD) - (delzAB * r12));
+
+    // angleangle forces
+
+    for (i = 0; i < 4; i++)
+      for (j = 0; j < 3; j++)
+	fabcd[i][j] = - 
+	  ((aa_k1[type] * 
+	    (dthABC*dthetadr[1][i][j] + dthCBD*dthetadr[0][i][j])) +
+	   (aa_k2[type] * 
+	    (dthABC*dthetadr[2][i][j] + dthABD*dthetadr[0][i][j])) +
+	   (aa_k3[type] *
+	    (dthABD*dthetadr[1][i][j] + dthCBD*dthetadr[2][i][j])));
+
+    // apply force to each of 4 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += fabcd[0][0];
+      f[i1][1] += fabcd[0][1];
+      f[i1][2] += fabcd[0][2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] += fabcd[1][0];
+      f[i2][1] += fabcd[1][1];
+      f[i2][2] += fabcd[1][2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += fabcd[2][0];
+      f[i3][1] += fabcd[2][1];
+      f[i3][2] += fabcd[2][2];
+    }
+
+    if (NEWTON_BOND || i4 < nlocal) {
+      f[i4][0] += fabcd[3][0];
+      f[i4][1] += fabcd[3][1];
+      f[i4][2] += fabcd[3][2];
+    }
+
+    if (EVFLAG)
+      ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,eimproper,
+		   fabcd[0],fabcd[2],fabcd[3],delxAB,delyAB,delzAB,
+		   delxBC,delyBC,delzBC,delxBD,delyBD,delzBD,thr);
+  }
+}
diff --git a/src/USER-OMP/improper_class2_omp.h b/src/USER-OMP/improper_class2_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..130a680d064043e72ff4163bda03de81f1711227
--- /dev/null
+++ b/src/USER-OMP/improper_class2_omp.h
@@ -0,0 +1,52 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef IMPROPER_CLASS
+
+ImproperStyle(class2/omp,ImproperClass2OMP)
+
+#else
+
+#ifndef LMP_IMPROPER_CLASS2_OMP_H
+#define LMP_IMPROPER_CLASS2_OMP_H
+
+#include "improper_class2.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class ImproperClass2OMP : public ImproperClass2, public ThrOMP {
+
+ public:
+    ImproperClass2OMP(class LAMMPS *lmp) : 
+      ImproperClass2(lmp), ThrOMP(lmp,THR_IMPROPER) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void angleangle_thr(int, int, ThrData * const thr);
+  
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/improper_cvff_omp.cpp b/src/USER-OMP/improper_cvff_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..06f3186e06d6e7062be69866af4221cfba2ee992
--- /dev/null
+++ b/src/USER-OMP/improper_cvff_omp.cpp
@@ -0,0 +1,295 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "mpi.h"
+#include "math.h"
+#include "improper_cvff_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "neighbor.h"
+#include "domain.h"
+#include "force.h"
+#include "update.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+#define TOLERANCE 0.05
+#define SMALL     0.001
+
+/* ---------------------------------------------------------------------- */
+
+void ImproperCvffOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nimproperlist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void ImproperCvffOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,i3,i4,m,n,type;
+  double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,vb2xm,vb2ym,vb2zm;
+  double eimproper,f1[3],f2[3],f3[3],f4[3];
+  double sb1,sb2,sb3,rb1,rb3,c0,b1mag2,b1mag,b2mag2;
+  double b2mag,b3mag2,b3mag,ctmp,r12c1,c1mag,r12c2;
+  double c2mag,sc1,sc2,s1,s2,s12,c,p,pd,rc2,a,a11,a22;
+  double a33,a12,a13,a23,sx2,sy2,sz2;
+
+  eimproper = 0.0;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const improperlist = neighbor->improperlist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = improperlist[n][0];
+    i2 = improperlist[n][1];
+    i3 = improperlist[n][2];
+    i4 = improperlist[n][3];
+    type = improperlist[n][4];
+
+    // 1st bond
+
+    vb1x = x[i1][0] - x[i2][0];
+    vb1y = x[i1][1] - x[i2][1];
+    vb1z = x[i1][2] - x[i2][2];
+    domain->minimum_image(vb1x,vb1y,vb1z);
+
+    // 2nd bond
+
+    vb2x = x[i3][0] - x[i2][0];
+    vb2y = x[i3][1] - x[i2][1];
+    vb2z = x[i3][2] - x[i2][2];
+    domain->minimum_image(vb2x,vb2y,vb2z);
+
+    vb2xm = -vb2x;
+    vb2ym = -vb2y;
+    vb2zm = -vb2z;
+    domain->minimum_image(vb2xm,vb2ym,vb2zm);
+
+    // 3rd bond
+
+    vb3x = x[i4][0] - x[i3][0];
+    vb3y = x[i4][1] - x[i3][1];
+    vb3z = x[i4][2] - x[i3][2];
+    domain->minimum_image(vb3x,vb3y,vb3z);
+
+    // c0 calculation
+        
+    sb1 = 1.0 / (vb1x*vb1x + vb1y*vb1y + vb1z*vb1z);
+    sb2 = 1.0 / (vb2x*vb2x + vb2y*vb2y + vb2z*vb2z);
+    sb3 = 1.0 / (vb3x*vb3x + vb3y*vb3y + vb3z*vb3z);
+        
+    rb1 = sqrt(sb1);
+    rb3 = sqrt(sb3);
+        
+    c0 = (vb1x*vb3x + vb1y*vb3y + vb1z*vb3z) * rb1*rb3;
+
+    // 1st and 2nd angle
+        
+    b1mag2 = vb1x*vb1x + vb1y*vb1y + vb1z*vb1z;
+    b1mag = sqrt(b1mag2);
+    b2mag2 = vb2x*vb2x + vb2y*vb2y + vb2z*vb2z;
+    b2mag = sqrt(b2mag2);
+    b3mag2 = vb3x*vb3x + vb3y*vb3y + vb3z*vb3z;
+    b3mag = sqrt(b3mag2);
+
+    ctmp = vb1x*vb2x + vb1y*vb2y + vb1z*vb2z;
+    r12c1 = 1.0 / (b1mag*b2mag);
+    c1mag = ctmp * r12c1;
+
+    ctmp = vb2xm*vb3x + vb2ym*vb3y + vb2zm*vb3z;
+    r12c2 = 1.0 / (b2mag*b3mag);
+    c2mag = ctmp * r12c2;
+
+    // cos and sin of 2 angles and final c
+
+    sc1 = sqrt(1.0 - c1mag*c1mag);
+    if (sc1 < SMALL) sc1 = SMALL;
+    sc1 = 1.0/sc1;
+
+    sc2 = sqrt(1.0 - c2mag*c2mag);
+    if (sc2 < SMALL) sc2 = SMALL;
+    sc2 = 1.0/sc2;
+
+    s1 = sc1 * sc1;
+    s2 = sc2 * sc2;
+    s12 = sc1 * sc2;
+    c = (c0 + c1mag*c2mag) * s12;
+
+    // error check
+
+    if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) {
+      int me = comm->me;
+
+      if (screen) {
+	char str[128];
+	sprintf(str,"Improper problem: %d/%d " BIGINT_FORMAT " %d %d %d %d",
+		me,thr->get_tid(),update->ntimestep,
+		atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
+	error->warning(FLERR,str,0);
+	fprintf(screen,"  1st atom: %d %g %g %g\n",
+		me,x[i1][0],x[i1][1],x[i1][2]);
+	fprintf(screen,"  2nd atom: %d %g %g %g\n",
+		me,x[i2][0],x[i2][1],x[i2][2]);
+	fprintf(screen,"  3rd atom: %d %g %g %g\n",
+		me,x[i3][0],x[i3][1],x[i3][2]);
+	fprintf(screen,"  4th atom: %d %g %g %g\n",
+		me,x[i4][0],x[i4][1],x[i4][2]);
+      }
+    }
+    
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+
+    // force & energy
+    // p = 1 + cos(n*phi) for d = 1
+    // p = 1 - cos(n*phi) for d = -1
+    // pd = dp/dc / 2
+        
+    m = multiplicity[type];
+
+    if (m == 2) {
+      p = 2.0*c*c;
+      pd = 2.0*c;
+    } else if (m == 3) {
+      rc2 = c*c;
+      p = (4.0*rc2-3.0)*c + 1.0;
+      pd = 6.0*rc2 - 1.5;
+    } else if (m == 4) {
+      rc2 = c*c;
+      p = 8.0*(rc2-1)*rc2 + 2.0;
+      pd = (16.0*rc2-8.0)*c;
+    } else if (m == 6) {
+      rc2 = c*c;
+      p = ((32.0*rc2-48.0)*rc2 + 18.0)*rc2;
+      pd = (96.0*(rc2-1.0)*rc2 + 18.0)*c;
+    } else if (m == 1) {
+      p = c + 1.0;
+      pd = 0.5;
+    } else if (m == 5) {
+      rc2 = c*c;
+      p = ((16.0*rc2-20.0)*rc2 + 5.0)*c + 1.0;
+      pd = (40.0*rc2-30.0)*rc2 + 2.5;
+    } else if (m == 0) {
+      p = 2.0;
+      pd = 0.0;
+    }
+    
+    if (sign[type] == -1) {
+      p = 2.0 - p;
+      pd = -pd;
+    }
+
+    if (EFLAG) eimproper = k[type]*p;
+
+    a = 2.0 * k[type] * pd;
+    c = c * a;
+    s12 = s12 * a;
+    a11 = c*sb1*s1;
+    a22 = -sb2*(2.0*c0*s12 - c*(s1+s2));
+    a33 = c*sb3*s2;
+    a12 = -r12c1*(c1mag*c*s1 + c2mag*s12);
+    a13 = -rb1*rb3*s12;
+    a23 = r12c2*(c2mag*c*s2 + c1mag*s12);
+
+    sx2  = a12*vb1x + a22*vb2x + a23*vb3x;
+    sy2  = a12*vb1y + a22*vb2y + a23*vb3y;
+    sz2  = a12*vb1z + a22*vb2z + a23*vb3z;
+
+    f1[0] = a11*vb1x + a12*vb2x + a13*vb3x;
+    f1[1] = a11*vb1y + a12*vb2y + a13*vb3y;
+    f1[2] = a11*vb1z + a12*vb2z + a13*vb3z;
+
+    f2[0] = -sx2 - f1[0];
+    f2[1] = -sy2 - f1[1];
+    f2[2] = -sz2 - f1[2];
+
+    f4[0] = a13*vb1x + a23*vb2x + a33*vb3x;
+    f4[1] = a13*vb1y + a23*vb2y + a33*vb3y;
+    f4[2] = a13*vb1z + a23*vb2z + a33*vb3z;
+
+    f3[0] = sx2 - f4[0];
+    f3[1] = sy2 - f4[1];
+    f3[2] = sz2 - f4[2];
+
+    // apply force to each of 4 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0];
+      f[i1][1] += f1[1];
+      f[i1][2] += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] += f2[0];
+      f[i2][1] += f2[1];
+      f[i2][2] += f2[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0];
+      f[i3][1] += f3[1];
+      f[i3][2] += f3[2];
+    }
+
+    if (NEWTON_BOND || i4 < nlocal) {
+      f[i4][0] += f4[0];
+      f[i4][1] += f4[1];
+      f[i4][2] += f4[2];
+    }
+
+    if (EVFLAG)
+      ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,eimproper,f1,f3,f4,
+		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr);
+  }
+}
diff --git a/src/USER-OMP/improper_cvff_omp.h b/src/USER-OMP/improper_cvff_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..f058b10eed03c051a728df8184fe840edcfdc1d0
--- /dev/null
+++ b/src/USER-OMP/improper_cvff_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef IMPROPER_CLASS
+
+ImproperStyle(cvff/omp,ImproperCvffOMP)
+
+#else
+
+#ifndef LMP_IMPROPER_CVFF_OMP_H
+#define LMP_IMPROPER_CVFF_OMP_H
+
+#include "improper_cvff.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class ImproperCvffOMP : public ImproperCvff, public ThrOMP {
+
+ public:
+    ImproperCvffOMP(class LAMMPS *lmp) : 
+      ImproperCvff(lmp), ThrOMP(lmp,THR_IMPROPER) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/improper_harmonic_omp.cpp b/src/USER-OMP/improper_harmonic_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..44e45a977dd082b3d912c6553293993d81803249
--- /dev/null
+++ b/src/USER-OMP/improper_harmonic_omp.cpp
@@ -0,0 +1,236 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "mpi.h"
+#include "math.h"
+#include "improper_harmonic_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "neighbor.h"
+#include "domain.h"
+#include "force.h"
+#include "update.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+#define TOLERANCE 0.05
+#define SMALL     0.001
+
+/* ---------------------------------------------------------------------- */
+
+void ImproperHarmonicOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nimproperlist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void ImproperHarmonicOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,i3,i4,n,type;
+  double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z;
+  double eimproper,f1[3],f2[3],f3[3],f4[3];
+  double ss1,ss2,ss3,r1,r2,r3,c0,c1,c2,s1,s2;
+  double s12,c,s,domega,a,a11,a22,a33,a12,a13,a23;
+  double sx2,sy2,sz2;
+
+  eimproper = 0.0;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const improperlist = neighbor->improperlist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = improperlist[n][0];
+    i2 = improperlist[n][1];
+    i3 = improperlist[n][2];
+    i4 = improperlist[n][3];
+    type = improperlist[n][4];
+
+    // geometry of 4-body
+
+    vb1x = x[i1][0] - x[i2][0];
+    vb1y = x[i1][1] - x[i2][1];
+    vb1z = x[i1][2] - x[i2][2];
+    domain->minimum_image(vb1x,vb1y,vb1z);
+
+    vb2x = x[i3][0] - x[i2][0];
+    vb2y = x[i3][1] - x[i2][1];
+    vb2z = x[i3][2] - x[i2][2];
+    domain->minimum_image(vb2x,vb2y,vb2z);
+
+    vb3x = x[i4][0] - x[i3][0];
+    vb3y = x[i4][1] - x[i3][1];
+    vb3z = x[i4][2] - x[i3][2];
+    domain->minimum_image(vb3x,vb3y,vb3z);
+
+    ss1 = 1.0 / (vb1x*vb1x + vb1y*vb1y + vb1z*vb1z);
+    ss2 = 1.0 / (vb2x*vb2x + vb2y*vb2y + vb2z*vb2z);
+    ss3 = 1.0 / (vb3x*vb3x + vb3y*vb3y + vb3z*vb3z);
+        
+    r1 = sqrt(ss1);
+    r2 = sqrt(ss2);
+    r3 = sqrt(ss3);
+        
+    // sin and cos of angle
+        
+    c0 = (vb1x * vb3x + vb1y * vb3y + vb1z * vb3z) * r1 * r3;
+    c1 = (vb1x * vb2x + vb1y * vb2y + vb1z * vb2z) * r1 * r2;
+    c2 = -(vb3x * vb2x + vb3y * vb2y + vb3z * vb2z) * r3 * r2;
+
+    s1 = 1.0 - c1*c1;
+    if (s1 < SMALL) s1 = SMALL;
+    s1 = 1.0 / s1;
+
+    s2 = 1.0 - c2*c2;
+    if (s2 < SMALL) s2 = SMALL;
+    s2 = 1.0 / s2;
+
+    s12 = sqrt(s1*s2);
+    c = (c1*c2 + c0) * s12;
+
+    // error check
+
+    if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) {
+      int me = comm->me;
+
+      if (screen) {
+	char str[128];
+	sprintf(str,"Improper problem: %d/%d " BIGINT_FORMAT " %d %d %d %d",
+		me,thr->get_tid(),update->ntimestep,
+		atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
+	error->warning(FLERR,str,0);
+	fprintf(screen,"  1st atom: %d %g %g %g\n",
+		me,x[i1][0],x[i1][1],x[i1][2]);
+	fprintf(screen,"  2nd atom: %d %g %g %g\n",
+		me,x[i2][0],x[i2][1],x[i2][2]);
+	fprintf(screen,"  3rd atom: %d %g %g %g\n",
+		me,x[i3][0],x[i3][1],x[i3][2]);
+	fprintf(screen,"  4th atom: %d %g %g %g\n",
+		me,x[i4][0],x[i4][1],x[i4][2]);
+      }
+    }
+    
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+
+    s = sqrt(1.0 - c*c);
+    if (s < SMALL) s = SMALL;
+
+    // force & energy
+
+    domega = acos(c) - chi[type];
+    a = k[type] * domega;
+
+    if (EFLAG) eimproper = a*domega;
+
+    a = -a * 2.0/s;
+    c = c * a;
+    s12 = s12 * a;
+    a11 = c*ss1*s1;
+    a22 = -ss2 * (2.0*c0*s12 - c*(s1+s2));
+    a33 = c*ss3*s2;
+    a12 = -r1*r2*(c1*c*s1 + c2*s12);
+    a13 = -r1*r3*s12;
+    a23 = r2*r3*(c2*c*s2 + c1*s12);
+
+    sx2  = a22*vb2x + a23*vb3x + a12*vb1x;
+    sy2  = a22*vb2y + a23*vb3y + a12*vb1y;
+    sz2  = a22*vb2z + a23*vb3z + a12*vb1z;
+
+    f1[0] = a12*vb2x + a13*vb3x + a11*vb1x;
+    f1[1] = a12*vb2y + a13*vb3y + a11*vb1y;
+    f1[2] = a12*vb2z + a13*vb3z + a11*vb1z;
+
+    f2[0] = -sx2 - f1[0];
+    f2[1] = -sy2 - f1[1];
+    f2[2] = -sz2 - f1[2];
+
+    f4[0] = a23*vb2x + a33*vb3x + a13*vb1x;
+    f4[1] = a23*vb2y + a33*vb3y + a13*vb1y;
+    f4[2] = a23*vb2z + a33*vb3z + a13*vb1z;
+
+    f3[0] = sx2 - f4[0];
+    f3[1] = sy2 - f4[1];
+    f3[2] = sz2 - f4[2];
+
+    // apply force to each of 4 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0];
+      f[i1][1] += f1[1];
+      f[i1][2] += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] += f2[0];
+      f[i2][1] += f2[1];
+      f[i2][2] += f2[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0];
+      f[i3][1] += f3[1];
+      f[i3][2] += f3[2];
+    }
+
+    if (NEWTON_BOND || i4 < nlocal) {
+      f[i4][0] += f4[0];
+      f[i4][1] += f4[1];
+      f[i4][2] += f4[2];
+    }
+
+    if (EVFLAG)
+      ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,eimproper,f1,f3,f4,
+		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr);
+  }
+}
diff --git a/src/USER-OMP/improper_harmonic_omp.h b/src/USER-OMP/improper_harmonic_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..a2a6a0a03387c59026ff13c7839e88036a061274
--- /dev/null
+++ b/src/USER-OMP/improper_harmonic_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef IMPROPER_CLASS
+
+ImproperStyle(harmonic/omp,ImproperHarmonicOMP)
+
+#else
+
+#ifndef LMP_IMPROPER_HARMONIC_OMP_H
+#define LMP_IMPROPER_HARMONIC_OMP_H
+
+#include "improper_harmonic.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class ImproperHarmonicOMP : public ImproperHarmonic, public ThrOMP {
+
+ public:
+    ImproperHarmonicOMP(class LAMMPS *lmp) : 
+      ImproperHarmonic(lmp), ThrOMP(lmp,THR_IMPROPER) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/improper_umbrella_omp.cpp b/src/USER-OMP/improper_umbrella_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1fb1d86be9c1052d16cb0660001cc1f0002bab82
--- /dev/null
+++ b/src/USER-OMP/improper_umbrella_omp.cpp
@@ -0,0 +1,252 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "mpi.h"
+#include "math.h"
+#include "improper_umbrella_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "neighbor.h"
+#include "domain.h"
+#include "force.h"
+#include "update.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+#define TOLERANCE 0.05
+#define SMALL     0.001
+
+/* ---------------------------------------------------------------------- */
+
+void ImproperUmbrellaOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nimproperlist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void ImproperUmbrellaOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,i3,i4,n,type;
+  double eimproper,f1[3],f2[3],f3[3],f4[3];
+  double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z;
+  double domega,c,a,s,projhfg,dhax,dhay,dhaz,dahx,dahy,dahz,cotphi;
+  double ax,ay,az,ra2,rh2,ra,rh,rar,rhr,arx,ary,arz,hrx,hry,hrz;
+
+  eimproper = 0.0;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const improperlist = neighbor->improperlist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = improperlist[n][0];
+    i2 = improperlist[n][1];
+    i3 = improperlist[n][2];
+    i4 = improperlist[n][3];
+    type = improperlist[n][4];
+
+    // 1st bond
+
+    vb1x = x[i2][0] - x[i1][0];
+    vb1y = x[i2][1] - x[i1][1];
+    vb1z = x[i2][2] - x[i1][2];
+    domain->minimum_image(vb1x,vb1y,vb1z);
+
+    // 2nd bond
+
+    vb2x = x[i3][0] - x[i1][0];
+    vb2y = x[i3][1] - x[i1][1];
+    vb2z = x[i3][2] - x[i1][2];
+    domain->minimum_image(vb2x,vb2y,vb2z);
+
+    // 3rd bond
+
+    vb3x = x[i4][0] - x[i1][0];
+    vb3y = x[i4][1] - x[i1][1];
+    vb3z = x[i4][2] - x[i1][2];
+    domain->minimum_image(vb3x,vb3y,vb3z);
+
+    // c0 calculation
+    // A = vb1 X vb2 is perpendicular to IJK plane
+
+    ax = vb1y*vb2z-vb1z*vb2y;
+    ay = vb1z*vb2x-vb1x*vb2z;
+    az = vb1x*vb2y-vb1y*vb2x;
+    ra2 = ax*ax+ay*ay+az*az;
+    rh2 = vb3x*vb3x+vb3y*vb3y+vb3z*vb3z;
+    ra = sqrt(ra2);
+    rh = sqrt(rh2);
+    if (ra < SMALL) ra = SMALL;
+    if (rh < SMALL) rh = SMALL;
+
+    rar = 1/ra;
+    rhr = 1/rh;
+    arx = ax*rar;
+    ary = ay*rar;
+    arz = az*rar;
+    hrx = vb3x*rhr;
+    hry = vb3y*rhr;
+    hrz = vb3z*rhr;
+
+    c = arx*hrx+ary*hry+arz*hrz;
+
+    // error check
+
+    if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) {
+      int me = comm->me;
+
+      if (screen) {
+	char str[128];
+	sprintf(str,"Improper problem: %d/%d " BIGINT_FORMAT " %d %d %d %d",
+		me,thr->get_tid(),update->ntimestep,
+		atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
+	error->warning(FLERR,str,0);
+	fprintf(screen,"  1st atom: %d %g %g %g\n",
+		me,x[i1][0],x[i1][1],x[i1][2]);
+	fprintf(screen,"  2nd atom: %d %g %g %g\n",
+		me,x[i2][0],x[i2][1],x[i2][2]);
+	fprintf(screen,"  3rd atom: %d %g %g %g\n",
+		me,x[i3][0],x[i3][1],x[i3][2]);
+	fprintf(screen,"  4th atom: %d %g %g %g\n",
+		me,x[i4][0],x[i4][1],x[i4][2]);
+      }
+    }
+    
+    if (c > 1.0) s = 1.0;
+    if (c < -1.0) s = -1.0;
+
+    s = sqrt(1.0 - c*c);
+    if (s < SMALL) s = SMALL;
+    cotphi = c/s;
+
+    projhfg = (vb3x*vb1x+vb3y*vb1y+vb3z*vb1z) / 
+      sqrt(vb1x*vb1x+vb1y*vb1y+vb1z*vb1z); 
+    projhfg += (vb3x*vb2x+vb3y*vb2y+vb3z*vb2z) / 
+      sqrt(vb2x*vb2x+vb2y*vb2y+vb2z*vb2z);
+    if (projhfg > 0.0) {
+      s *= -1.0;
+      cotphi *= -1.0;
+    }
+	
+    //  force and energy
+    // if w0 = 0: E = k * (1 - cos w)
+    // if w0 != 0: E = 0.5 * C (cos w - cos w0)^2, C = k/(sin(w0)^2
+
+    if (w0[type] == 0.0) {
+      if (EFLAG) eimproper = kw[type] * (1.0-s);
+      a = -kw[type];
+    } else {
+      domega = s - cos(w0[type]);
+      a = 0.5 * C[type] * domega;
+      if (EFLAG) eimproper = a * domega;
+      a *= 2.0;
+    }
+
+    // dhax = diffrence between H and A in X direction, etc
+
+    a = a*cotphi;
+    dhax = hrx-c*arx;
+    dhay = hry-c*ary;
+    dhaz = hrz-c*arz;
+
+    dahx = arx-c*hrx;
+    dahy = ary-c*hry;
+    dahz = arz-c*hrz;
+
+    f2[0] = (dhay*vb1z - dhaz*vb1y)*rar;
+    f2[1] = (dhaz*vb1x - dhax*vb1z)*rar;
+    f2[2] = (dhax*vb1y - dhay*vb1x)*rar;
+
+    f3[0] = (-dhay*vb2z + dhaz*vb2y)*rar;
+    f3[1] = (-dhaz*vb2x + dhax*vb2z)*rar;
+    f3[2] = (-dhax*vb2y + dhay*vb2x)*rar;
+
+    f4[0] = dahx*rhr;
+    f4[1] = dahy*rhr;
+    f4[2] = dahz*rhr;
+
+    f1[0] = -(f2[0] + f3[0] + f4[0]);
+    f1[1] = -(f2[1] + f3[1] + f4[1]);
+    f1[2] = -(f2[2] + f3[2] + f4[2]);
+
+    // apply force to each of 4 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0]*a;
+      f[i1][1] += f1[1]*a;
+      f[i1][2] += f1[2]*a;
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] += f3[0]*a;
+      f[i2][1] += f3[1]*a;
+      f[i2][2] += f3[2]*a;
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f2[0]*a;
+      f[i3][1] += f2[1]*a;
+      f[i3][2] += f2[2]*a;
+    }
+
+    if (NEWTON_BOND || i4 < nlocal) {
+      f[i4][0] += f4[0]*a;
+      f[i4][1] += f4[1]*a;
+      f[i4][2] += f4[2]*a;
+    }
+
+    if (EVFLAG)
+      ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,eimproper,f1,f3,f4,
+		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr);
+  }
+}
diff --git a/src/USER-OMP/improper_umbrella_omp.h b/src/USER-OMP/improper_umbrella_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..757089212b1f31c9bc1a77ea839cc0a51e31764d
--- /dev/null
+++ b/src/USER-OMP/improper_umbrella_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef IMPROPER_CLASS
+
+ImproperStyle(umbrella/omp,ImproperUmbrellaOMP)
+
+#else
+
+#ifndef LMP_IMPROPER_UMBRELLA_OMP_H
+#define LMP_IMPROPER_UMBRELLA_OMP_H
+
+#include "improper_umbrella.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class ImproperUmbrellaOMP : public ImproperUmbrella, public ThrOMP {
+
+ public:
+    ImproperUmbrellaOMP(class LAMMPS *lmp) : 
+      ImproperUmbrella(lmp), ThrOMP(lmp,THR_IMPROPER) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_airebo_omp.cpp b/src/USER-OMP/pair_airebo_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..bb8b38a5b1f5a7845ea0e9a248cf3264815d006c
--- /dev/null
+++ b/src/USER-OMP/pair_airebo_omp.cpp
@@ -0,0 +1,2768 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_airebo_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "error.h"
+#include "force.h"
+#include "memory.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+#if defined(_OPENMP)
+#include <omp.h>
+#endif
+
+using namespace LAMMPS_NS;
+
+#define TOL 1.0e-9
+
+/* ---------------------------------------------------------------------- */
+
+PairAIREBOOMP::PairAIREBOOMP(LAMMPS *lmp) :
+  PairAIREBO(lmp), ThrOMP(lmp, THR_PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairAIREBOOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = vflag_fdotr = vflag_atom = 0;
+
+  REBO_neigh_thr();
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    FREBO_thr(ifrom,ito,evflag,eflag,vflag_atom,thr);
+    if (ljflag) FLJ_thr(ifrom,ito,evflag,eflag,vflag_atom,thr);
+    if (torflag) TORSION_thr(ifrom,ito,evflag,eflag,thr);
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+/* ----------------------------------------------------------------------
+   Bij function
+------------------------------------------------------------------------- */
+
+double PairAIREBOOMP::bondorder_thr(int i, int j, double rij[3], double rijmag,
+				    double VA, int vflag_atom, ThrData * const thr)
+{
+  int atomi,atomj,k,n,l,atomk,atoml,atomn,atom1,atom2,atom3,atom4;
+  int itype,jtype,ktype,ltype,ntype;
+  double rik[3],rjl[3],rkn[3],rji[3],rki[3],rlj[3],rknmag,dNki,dwjl,bij;
+  double NijC,NijH,NjiC,NjiH,wik,dwik,dwkn,wjl;
+  double rikmag,rjlmag,cosjik,cosijl,g,tmp2,tmp3;
+  double Etmp,pij,tmp,wij,dwij,NconjtmpI,NconjtmpJ,Nki,Nlj,dS;
+  double lamdajik,lamdaijl,dgdc,dgdN,pji,Nijconj,piRC;
+  double dcosjikdri[3],dcosijldri[3],dcosjikdrk[3];
+  double dN2[2],dN3[3];
+  double dcosjikdrj[3],dcosijldrj[3],dcosijldrl[3];
+  double Tij;
+  double r32[3],r32mag,cos321,r43[3],r13[3];
+  double dNlj;
+  double om1234,rln[3];
+  double rlnmag,dwln,r23[3],r23mag,r21[3],r21mag;
+  double w21,dw21,r34[3],r34mag,cos234,w34,dw34;
+  double cross321[3],cross234[3],prefactor,SpN;
+  double fcijpc,fcikpc,fcjlpc,fcjkpc,fcilpc;
+  double dt2dik[3],dt2djl[3],dt2dij[3],aa,aaa1,aaa2,at2,cw,cwnum,cwnom;
+  double sin321,sin234,rr,rijrik,rijrjl,rjk2,rik2,ril2,rjl2;
+  double dctik,dctjk,dctjl,dctij,dctji,dctil,rik2i,rjl2i,sink2i,sinl2i;
+  double rjk[3],ril[3],dt1dik,dt1djk,dt1djl,dt1dil,dt1dij;
+  double F23[3],F12[3],F34[3],F31[3],F24[3],fi[3],fj[3],fk[3],fl[3];
+  double f1[3],f2[3],f3[3],f4[4];
+  double dcut321,PijS,PjiS;
+  double rij2,tspjik,dtsjik,tspijl,dtsijl,costmp;
+  int *REBO_neighs,*REBO_neighs_i,*REBO_neighs_j,*REBO_neighs_k,*REBO_neighs_l;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  int *type = atom->type;
+
+  atomi = i;
+  atomj = j;
+  itype = map[type[i]];
+  jtype = map[type[j]];
+  wij = Sp(rijmag,rcmin[itype][jtype],rcmax[itype][jtype],dwij);
+  NijC = nC[i]-(wij*kronecker(jtype,0));
+  NijH = nH[i]-(wij*kronecker(jtype,1));
+  NjiC = nC[j]-(wij*kronecker(itype,0));
+  NjiH = nH[j]-(wij*kronecker(itype,1));
+  bij = 0.0;
+  tmp = 0.0;
+  tmp2 = 0.0;
+  tmp3 = 0.0;
+  dgdc = 0.0;
+  dgdN = 0.0;
+  NconjtmpI = 0.0;
+  NconjtmpJ = 0.0;
+  Etmp = 0.0;
+
+  REBO_neighs = REBO_firstneigh[i];
+  for (k = 0; k < REBO_numneigh[i]; k++) {
+    atomk = REBO_neighs[k];
+    if (atomk != atomj) {
+      ktype = map[type[atomk]];
+      rik[0] = x[atomi][0]-x[atomk][0];
+      rik[1] = x[atomi][1]-x[atomk][1];
+      rik[2] = x[atomi][2]-x[atomk][2];
+      rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2]));
+      lamdajik = 4.0*kronecker(itype,1) *
+	((rho[ktype][1]-rikmag)-(rho[jtype][1]-rijmag));
+      wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dS);
+      Nki = nC[atomk]-(wik*kronecker(itype,0))+nH[atomk] -
+	(wik*kronecker(itype,1));
+      cosjik = ((rij[0]*rik[0])+(rij[1]*rik[1])+(rij[2]*rik[2])) /
+	(rijmag*rikmag);
+      cosjik = MIN(cosjik,1.0);
+      cosjik = MAX(cosjik,-1.0);
+
+      // evaluate splines g and derivatives dg
+
+      g = gSpline(cosjik,(NijC+NijH),itype,&dgdc,&dgdN);
+      Etmp = Etmp+(wik*g*exp(lamdajik));
+      tmp3 = tmp3+(wik*dgdN*exp(lamdajik));
+      NconjtmpI = NconjtmpI+(kronecker(ktype,0)*wik*Sp(Nki,Nmin,Nmax,dS));
+    }
+  }
+
+  PijS = 0.0;
+  dN2[0] = 0.0;
+  dN2[1] = 0.0;
+  PijS = PijSpline(NijC,NijH,itype,jtype,dN2);
+  pij = 1.0/sqrt(1.0+Etmp+PijS);
+  tmp = -0.5*pij*pij*pij;
+
+  const double invrijm = 1.0/rijmag;
+  const double invrijm2 = invrijm*invrijm;
+
+  // pij forces
+
+  REBO_neighs = REBO_firstneigh[i];
+  for (k = 0; k < REBO_numneigh[i]; k++) {
+    atomk = REBO_neighs[k];
+    if (atomk != atomj) {
+      ktype = map[type[atomk]];
+      rik[0] = x[atomi][0]-x[atomk][0];
+      rik[1] = x[atomi][1]-x[atomk][1];
+      rik[2] = x[atomi][2]-x[atomk][2];
+      rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2]));
+      lamdajik = 4.0*kronecker(itype,1) *
+	((rho[ktype][1]-rikmag)-(rho[jtype][1]-rijmag));
+      wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dwik);
+
+      const double invrikm = 1.0/rikmag;
+      const double invrijkm = invrijm*invrikm;
+      const double invrikm2 = invrikm*invrikm;
+
+      cosjik = (rij[0]*rik[0] + rij[1]*rik[1] + rij[2]*rik[2]) * invrijkm;
+      cosjik = MIN(cosjik,1.0);
+      cosjik = MAX(cosjik,-1.0);
+
+      dcosjikdri[0] = ((rij[0]+rik[0])*invrijkm) -
+	(cosjik*((rij[0]*invrijm2)+(rik[0]*invrikm2)));
+      dcosjikdri[1] = ((rij[1]+rik[1])*invrijkm) -
+	(cosjik*((rij[1]*invrijm2)+(rik[1]*invrikm2)));
+      dcosjikdri[2] = ((rij[2]+rik[2])*invrijkm) -
+	(cosjik*((rij[2]*invrijm2)+(rik[2]*invrikm2)));
+      dcosjikdrk[0] = (-rij[0]*invrijkm) + (cosjik*(rik[0]*invrikm2));
+      dcosjikdrk[1] = (-rij[1]*invrijkm) + (cosjik*(rik[1]*invrikm2));
+      dcosjikdrk[2] = (-rij[2]*invrijkm) + (cosjik*(rik[2]*invrikm2));
+      dcosjikdrj[0] = (-rik[0]*invrijkm) + (cosjik*(rij[0]*invrijm2));
+      dcosjikdrj[1] = (-rik[1]*invrijkm) + (cosjik*(rij[1]*invrijm2));
+      dcosjikdrj[2] = (-rik[2]*invrijkm) + (cosjik*(rij[2]*invrijm2));
+
+      g = gSpline(cosjik,(NijC+NijH),itype,&dgdc,&dgdN);
+      tmp2 = VA*.5*(tmp*wik*dgdc*exp(lamdajik));
+      fj[0] = -tmp2*dcosjikdrj[0];
+      fj[1] = -tmp2*dcosjikdrj[1];
+      fj[2] = -tmp2*dcosjikdrj[2];
+      fi[0] = -tmp2*dcosjikdri[0];
+      fi[1] = -tmp2*dcosjikdri[1];
+      fi[2] = -tmp2*dcosjikdri[2];
+      fk[0] = -tmp2*dcosjikdrk[0];
+      fk[1] = -tmp2*dcosjikdrk[1];
+      fk[2] = -tmp2*dcosjikdrk[2];
+
+      tmp2 = VA*.5*(tmp*wik*g*exp(lamdajik)*4.0*kronecker(itype,1));
+      fj[0] -= tmp2*(-rij[0]*invrijm);
+      fj[1] -= tmp2*(-rij[1]*invrijm);
+      fj[2] -= tmp2*(-rij[2]*invrijm);
+      fi[0] -= tmp2*((-rik[0]*invrikm)+(rij[0]*invrijm));
+      fi[1] -= tmp2*((-rik[1]*invrikm)+(rij[1]*invrijm));
+      fi[2] -= tmp2*((-rik[2]*invrikm)+(rij[2]*invrijm));
+      fk[0] -= tmp2*(rik[0]*invrikm);
+      fk[1] -= tmp2*(rik[1]*invrikm);
+      fk[2] -= tmp2*(rik[2]*invrikm);
+
+      // coordination forces
+
+      // dwik forces
+
+      tmp2 = VA*.5*(tmp*dwik*g*exp(lamdajik))*invrikm;
+      fi[0] -= tmp2*rik[0];
+      fi[1] -= tmp2*rik[1];
+      fi[2] -= tmp2*rik[2];
+      fk[0] += tmp2*rik[0];
+      fk[1] += tmp2*rik[1];
+      fk[2] += tmp2*rik[2];
+
+      // PIJ forces
+
+      tmp2 = VA*.5*(tmp*dN2[ktype]*dwik)*invrikm;
+      fi[0] -= tmp2*rik[0];
+      fi[1] -= tmp2*rik[1];
+      fi[2] -= tmp2*rik[2];
+      fk[0] += tmp2*rik[0];
+      fk[1] += tmp2*rik[1];
+      fk[2] += tmp2*rik[2];
+
+      // dgdN forces
+
+      tmp2 = VA*.5*(tmp*tmp3*dwik)*invrikm;
+      fi[0] -= tmp2*rik[0];
+      fi[1] -= tmp2*rik[1];
+      fi[2] -= tmp2*rik[2];
+      fk[0] += tmp2*rik[0];
+      fk[1] += tmp2*rik[1];
+      fk[2] += tmp2*rik[2];
+
+      f[atomi][0] += fi[0]; f[atomi][1] += fi[1]; f[atomi][2] += fi[2];
+      f[atomj][0] += fj[0]; f[atomj][1] += fj[1]; f[atomj][2] += fj[2];
+      f[atomk][0] += fk[0]; f[atomk][1] += fk[1]; f[atomk][2] += fk[2];
+
+      if (vflag_atom) {
+	rji[0] = -rij[0]; rji[1] = -rij[1]; rji[2] = -rij[2];
+	rki[0] = -rik[0]; rki[1] = -rik[1]; rki[2] = -rik[2];
+	v_tally3_thr(atomi,atomj,atomk,fj,fk,rji,rki,thr);
+      }
+    }
+  }
+
+  tmp = 0.0;
+  tmp2 = 0.0;
+  tmp3 = 0.0;
+  Etmp = 0.0;
+
+  REBO_neighs = REBO_firstneigh[j];
+  for (l = 0; l < REBO_numneigh[j]; l++) {
+    atoml = REBO_neighs[l];
+    if (atoml != atomi) {
+      ltype = map[type[atoml]];
+      rjl[0] = x[atomj][0]-x[atoml][0];
+      rjl[1] = x[atomj][1]-x[atoml][1];
+      rjl[2] = x[atomj][2]-x[atoml][2];
+      rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2]));
+      lamdaijl = 4.0*kronecker(jtype,1) *
+	((rho[ltype][1]-rjlmag)-(rho[itype][1]-rijmag));
+      wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dS);
+      Nlj = nC[atoml]-(wjl*kronecker(jtype,0)) +
+	nH[atoml]-(wjl*kronecker(jtype,1));
+      cosijl = -1.0*((rij[0]*rjl[0])+(rij[1]*rjl[1])+(rij[2]*rjl[2])) /
+	(rijmag*rjlmag);
+      cosijl = MIN(cosijl,1.0);
+      cosijl = MAX(cosijl,-1.0);
+
+      // evaluate splines g and derivatives dg
+
+      g = gSpline(cosijl,NjiC+NjiH,jtype,&dgdc,&dgdN);
+      Etmp = Etmp+(wjl*g*exp(lamdaijl));
+      tmp3 = tmp3+(wjl*dgdN*exp(lamdaijl));
+      NconjtmpJ = NconjtmpJ+(kronecker(ltype,0)*wjl*Sp(Nlj,Nmin,Nmax,dS));
+    }
+  }
+
+  PjiS = 0.0;
+  dN2[0] = 0.0;
+  dN2[1] = 0.0;
+  PjiS = PijSpline(NjiC,NjiH,jtype,itype,dN2);
+  pji = 1.0/sqrt(1.0+Etmp+PjiS);
+  tmp = -0.5*pji*pji*pji;
+
+  REBO_neighs = REBO_firstneigh[j];
+  for (l = 0; l < REBO_numneigh[j]; l++) {
+    atoml = REBO_neighs[l];
+    if (atoml != atomi) {
+      ltype = map[type[atoml]];
+      rjl[0] = x[atomj][0]-x[atoml][0];
+      rjl[1] = x[atomj][1]-x[atoml][1];
+      rjl[2] = x[atomj][2]-x[atoml][2];
+      rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2]));
+      lamdaijl = 4.0*kronecker(jtype,1) *
+	((rho[ltype][1]-rjlmag)-(rho[itype][1]-rijmag));
+      wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dwjl);
+
+      const double invrjlm = 1.0/rjlmag;
+      const double invrijlm = invrijm*invrjlm;
+      const double invrjlm2 = invrjlm*invrjlm;
+
+      cosijl = (-1.0*((rij[0]*rjl[0])+(rij[1]*rjl[1])+(rij[2]*rjl[2])))
+	* invrijlm;
+
+      cosijl = MIN(cosijl,1.0);
+      cosijl = MAX(cosijl,-1.0);
+
+      dcosijldri[0] = (-rjl[0]*invrijlm) - (cosijl*rij[0]*invrijm2);
+      dcosijldri[1] = (-rjl[1]*invrijlm) - (cosijl*rij[1]*invrijm2);
+      dcosijldri[2] = (-rjl[2]*invrijlm) - (cosijl*rij[2]*invrijm2);
+      dcosijldrj[0] = ((-rij[0]+rjl[0])*invrijlm) +
+	(cosijl*((rij[0]*invrijm2)-(rjl[0]*invrjlm2)));
+      dcosijldrj[1] = ((-rij[1]+rjl[1])*invrijlm) +
+	(cosijl*((rij[1]*invrijm2)-(rjl[1]*invrjlm2)));
+      dcosijldrj[2] = ((-rij[2]+rjl[2])*invrijlm) +
+	(cosijl*((rij[2]*invrijm2)-(rjl[2]*invrjlm2)));
+      dcosijldrl[0] = (rij[0]*invrijlm)+(cosijl*rjl[0]*invrjlm2);
+      dcosijldrl[1] = (rij[1]*invrijlm)+(cosijl*rjl[1]*invrjlm2);
+      dcosijldrl[2] = (rij[2]*invrijlm)+(cosijl*rjl[2]*invrjlm2);
+
+      // evaluate splines g and derivatives dg
+
+      g = gSpline(cosijl,NjiC+NjiH,jtype,&dgdc,&dgdN);
+      tmp2 = VA*.5*(tmp*wjl*dgdc*exp(lamdaijl));
+      fi[0] = -tmp2*dcosijldri[0];
+      fi[1] = -tmp2*dcosijldri[1];
+      fi[2] = -tmp2*dcosijldri[2];
+      fj[0] = -tmp2*dcosijldrj[0];
+      fj[1] = -tmp2*dcosijldrj[1];
+      fj[2] = -tmp2*dcosijldrj[2];
+      fl[0] = -tmp2*dcosijldrl[0];
+      fl[1] = -tmp2*dcosijldrl[1];
+      fl[2] = -tmp2*dcosijldrl[2];
+
+      tmp2 = VA*.5*(tmp*wjl*g*exp(lamdaijl)*4.0*kronecker(jtype,1));
+      fi[0] -= tmp2*(rij[0]*invrijm);
+      fi[1] -= tmp2*(rij[1]*invrijm);
+      fi[2] -= tmp2*(rij[2]*invrijm);
+      fj[0] -= tmp2*((-rjl[0]*invrjlm)-(rij[0]*invrijm));
+      fj[1] -= tmp2*((-rjl[1]*invrjlm)-(rij[1]*invrijm));
+      fj[2] -= tmp2*((-rjl[2]*invrjlm)-(rij[2]*invrijm));
+      fl[0] -= tmp2*(rjl[0]*invrjlm);
+      fl[1] -= tmp2*(rjl[1]*invrjlm);
+      fl[2] -= tmp2*(rjl[2]*invrjlm);
+
+      // coordination forces
+
+      // dwik forces
+
+      tmp2 = VA*.5*(tmp*dwjl*g*exp(lamdaijl))*invrjlm;
+      fj[0] -= tmp2*rjl[0];
+      fj[1] -= tmp2*rjl[1];
+      fj[2] -= tmp2*rjl[2];
+      fl[0] += tmp2*rjl[0];
+      fl[1] += tmp2*rjl[1];
+      fl[2] += tmp2*rjl[2];
+
+      // PIJ forces
+
+      tmp2 = VA*.5*(tmp*dN2[ltype]*dwjl)*invrjlm;
+      fj[0] -= tmp2*rjl[0];
+      fj[1] -= tmp2*rjl[1];
+      fj[2] -= tmp2*rjl[2];
+      fl[0] += tmp2*rjl[0];
+      fl[1] += tmp2*rjl[1];
+      fl[2] += tmp2*rjl[2];
+
+      // dgdN forces
+
+      tmp2 = VA*.5*(tmp*tmp3*dwjl)*invrjlm;
+      fj[0] -= tmp2*rjl[0];
+      fj[1] -= tmp2*rjl[1];
+      fj[2] -= tmp2*rjl[2];
+      fl[0] += tmp2*rjl[0];
+      fl[1] += tmp2*rjl[1];
+      fl[2] += tmp2*rjl[2];
+
+      f[atomi][0] += fi[0]; f[atomi][1] += fi[1]; f[atomi][2] += fi[2];
+      f[atomj][0] += fj[0]; f[atomj][1] += fj[1]; f[atomj][2] += fj[2];
+      f[atoml][0] += fl[0]; f[atoml][1] += fl[1]; f[atoml][2] += fl[2];
+
+      if (vflag_atom) {
+	rlj[0] = -rjl[0]; rlj[1] = -rjl[1]; rlj[2] = -rjl[2];
+	v_tally3_thr(atomi,atomj,atoml,fi,fl,rij,rlj,thr);
+      }
+    }
+  }
+
+  // evaluate Nij conj
+
+  Nijconj = 1.0+(NconjtmpI*NconjtmpI)+(NconjtmpJ*NconjtmpJ);
+  piRC = piRCSpline(NijC+NijH,NjiC+NjiH,Nijconj,itype,jtype,dN3);
+
+  // piRC forces
+
+  REBO_neighs_i = REBO_firstneigh[i];
+  for (k = 0; k < REBO_numneigh[i]; k++) {
+    atomk = REBO_neighs_i[k];
+    if (atomk !=atomj) {
+      ktype = map[type[atomk]];
+      rik[0] = x[atomi][0]-x[atomk][0];
+      rik[1] = x[atomi][1]-x[atomk][1];
+      rik[2] = x[atomi][2]-x[atomk][2];
+      rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2]));
+      wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dwik);
+      Nki = nC[atomk]-(wik*kronecker(itype,0))+nH[atomk] -
+	(wik*kronecker(itype,1));
+      SpN = Sp(Nki,Nmin,Nmax,dNki);
+
+      tmp2 = VA*dN3[0]*dwik/rikmag;
+      f[atomi][0] -= tmp2*rik[0];
+      f[atomi][1] -= tmp2*rik[1];
+      f[atomi][2] -= tmp2*rik[2];
+      f[atomk][0] += tmp2*rik[0];
+      f[atomk][1] += tmp2*rik[1];
+      f[atomk][2] += tmp2*rik[2];
+
+      if (vflag_atom) v_tally2_thr(atomi,atomk,-tmp2,rik,thr);
+
+      tmp2 = VA*dN3[2]*(2.0*NconjtmpI*dwik*SpN)/rikmag;
+      f[atomi][0] -= tmp2*rik[0];
+      f[atomi][1] -= tmp2*rik[1];
+      f[atomi][2] -= tmp2*rik[2];
+      f[atomk][0] += tmp2*rik[0];
+      f[atomk][1] += tmp2*rik[1];
+      f[atomk][2] += tmp2*rik[2];
+
+      if (vflag_atom) v_tally2_thr(atomi,atomk,-tmp2,rik,thr);
+
+      if (fabs(dNki) > TOL) {
+	REBO_neighs_k = REBO_firstneigh[atomk];
+	for (n = 0; n < REBO_numneigh[atomk]; n++) {
+	  atomn = REBO_neighs_k[n];
+	  if (atomn != atomi) {
+	    ntype = map[type[atomn]];
+	    rkn[0] = x[atomk][0]-x[atomn][0];
+	    rkn[1] = x[atomk][1]-x[atomn][1];
+	    rkn[2] = x[atomk][2]-x[atomn][2];
+	    rknmag = sqrt((rkn[0]*rkn[0])+(rkn[1]*rkn[1])+(rkn[2]*rkn[2]));
+	    Sp(rknmag,rcmin[ktype][ntype],rcmax[ktype][ntype],dwkn);
+
+	    tmp2 = VA*dN3[2]*(2.0*NconjtmpI*wik*dNki*dwkn)/rknmag;
+	    f[atomk][0] -= tmp2*rkn[0];
+	    f[atomk][1] -= tmp2*rkn[1];
+	    f[atomk][2] -= tmp2*rkn[2];
+	    f[atomn][0] += tmp2*rkn[0];
+	    f[atomn][1] += tmp2*rkn[1];
+	    f[atomn][2] += tmp2*rkn[2];
+
+	    if (vflag_atom) v_tally2_thr(atomk,atomn,-tmp2,rkn,thr);
+	  }
+	}
+      }
+    }
+  }
+
+  // piRC forces
+
+  REBO_neighs = REBO_firstneigh[atomj];
+  for (l = 0; l < REBO_numneigh[atomj]; l++) {
+    atoml = REBO_neighs[l];
+    if (atoml !=atomi) {
+      ltype = map[type[atoml]];
+      rjl[0] = x[atomj][0]-x[atoml][0];
+      rjl[1] = x[atomj][1]-x[atoml][1];
+      rjl[2] = x[atomj][2]-x[atoml][2];
+      rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2]));
+      wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dwjl);
+      Nlj = nC[atoml]-(wjl*kronecker(jtype,0))+nH[atoml] -
+	(wjl*kronecker(jtype,1));
+      SpN = Sp(Nlj,Nmin,Nmax,dNlj);
+
+      tmp2 = VA*dN3[1]*dwjl/rjlmag;
+      f[atomj][0] -= tmp2*rjl[0];
+      f[atomj][1] -= tmp2*rjl[1];
+      f[atomj][2] -= tmp2*rjl[2];
+      f[atoml][0] += tmp2*rjl[0];
+      f[atoml][1] += tmp2*rjl[1];
+      f[atoml][2] += tmp2*rjl[2];
+
+      if (vflag_atom) v_tally2_thr(atomj,atoml,-tmp2,rjl,thr);
+
+      tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*dwjl*SpN)/rjlmag;
+      f[atomj][0] -= tmp2*rjl[0];
+      f[atomj][1] -= tmp2*rjl[1];
+      f[atomj][2] -= tmp2*rjl[2];
+      f[atoml][0] += tmp2*rjl[0];
+      f[atoml][1] += tmp2*rjl[1];
+      f[atoml][2] += tmp2*rjl[2];
+
+      if (vflag_atom) v_tally2_thr(atomj,atoml,-tmp2,rjl,thr);
+
+      if (fabs(dNlj) > TOL) {
+	REBO_neighs_l = REBO_firstneigh[atoml];
+	for (n = 0; n < REBO_numneigh[atoml]; n++) {
+	  atomn = REBO_neighs_l[n];
+	  if (atomn != atomj) {
+	    ntype = map[type[atomn]];
+	    rln[0] = x[atoml][0]-x[atomn][0];
+	    rln[1] = x[atoml][1]-x[atomn][1];
+	    rln[2] = x[atoml][2]-x[atomn][2];
+	    rlnmag = sqrt((rln[0]*rln[0])+(rln[1]*rln[1])+(rln[2]*rln[2]));
+	    Sp(rlnmag,rcmin[ltype][ntype],rcmax[ltype][ntype],dwln);
+
+	    tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*wjl*dNlj*dwln)/rlnmag;
+	    f[atoml][0] -= tmp2*rln[0];
+	    f[atoml][1] -= tmp2*rln[1];
+	    f[atoml][2] -= tmp2*rln[2];
+	    f[atomn][0] += tmp2*rln[0];
+	    f[atomn][1] += tmp2*rln[1];
+	    f[atomn][2] += tmp2*rln[2];
+
+	    if (vflag_atom) v_tally2_thr(atoml,atomn,-tmp2,rln,thr);
+	  }
+	}
+      }
+    }
+  }
+
+  Tij = 0.0;
+  dN3[0] = 0.0;
+  dN3[1] = 0.0;
+  dN3[2] = 0.0;
+  if (itype == 0 && jtype == 0)
+    Tij=TijSpline((NijC+NijH),(NjiC+NjiH),Nijconj,dN3);
+  Etmp = 0.0;
+
+  if (fabs(Tij) > TOL) {
+    atom2 = atomi;
+    atom3 = atomj;
+    r32[0] = x[atom3][0]-x[atom2][0];
+    r32[1] = x[atom3][1]-x[atom2][1];
+    r32[2] = x[atom3][2]-x[atom2][2];
+    r32mag = sqrt((r32[0]*r32[0])+(r32[1]*r32[1])+(r32[2]*r32[2]));
+    r23[0] = -r32[0];
+    r23[1] = -r32[1];
+    r23[2] = -r32[2];
+    r23mag = r32mag;
+    REBO_neighs_i = REBO_firstneigh[i];
+    for (k = 0; k < REBO_numneigh[i]; k++) {
+      atomk = REBO_neighs_i[k];
+      atom1 = atomk;
+      ktype = map[type[atomk]];
+      if (atomk != atomj) {
+	r21[0] = x[atom2][0]-x[atom1][0];
+	r21[1] = x[atom2][1]-x[atom1][1];
+	r21[2] = x[atom2][2]-x[atom1][2];
+	r21mag = sqrt(r21[0]*r21[0] + r21[1]*r21[1] + r21[2]*r21[2]);
+	cos321 = -1.0*((r21[0]*r32[0])+(r21[1]*r32[1])+(r21[2]*r32[2])) /
+	  (r21mag*r32mag);
+	cos321 = MIN(cos321,1.0);
+	cos321 = MAX(cos321,-1.0);
+	Sp2(cos321,thmin,thmax,dcut321);
+	sin321 = sqrt(1.0 - cos321*cos321);
+	sink2i = 1.0/(sin321*sin321);
+	rik2i = 1.0/(r21mag*r21mag);
+	if (sin321 != 0.0) {
+	  rr = (r23mag*r23mag)-(r21mag*r21mag);
+	  rjk[0] = r21[0]-r23[0];
+	  rjk[1] = r21[1]-r23[1];
+	  rjk[2] = r21[2]-r23[2];
+	  rjk2 = (rjk[0]*rjk[0])+(rjk[1]*rjk[1])+(rjk[2]*rjk[2]);
+	  rijrik = 2.0*r23mag*r21mag;
+	  rik2 = r21mag*r21mag;
+	  dctik = (-rr+rjk2)/(rijrik*rik2);
+	  dctij = (rr+rjk2)/(rijrik*r23mag*r23mag);
+	  dctjk = -2.0/rijrik;
+	  w21 = Sp(r21mag,rcmin[itype][ktype],rcmaxp[itype][ktype],dw21);
+	  rijmag = r32mag;
+	  rikmag = r21mag;
+	  rij2 = r32mag*r32mag;
+	  rik2 = r21mag*r21mag;
+	  costmp = 0.5*(rij2+rik2-rjk2)/rijmag/rikmag;
+	  tspjik = Sp2(costmp,thmin,thmax,dtsjik);
+	  dtsjik = -dtsjik;
+
+	  REBO_neighs_j = REBO_firstneigh[j];
+	  for (l = 0; l < REBO_numneigh[j]; l++) {
+	    atoml = REBO_neighs_j[l];
+	    atom4 = atoml;
+	    ltype = map[type[atoml]];
+	    if (!(atoml == atomi || atoml == atomk)) {
+	      r34[0] = x[atom3][0]-x[atom4][0];
+	      r34[1] = x[atom3][1]-x[atom4][1];
+	      r34[2] = x[atom3][2]-x[atom4][2];
+	      r34mag = sqrt((r34[0]*r34[0])+(r34[1]*r34[1])+(r34[2]*r34[2]));
+	      cos234 = (r32[0]*r34[0] + r32[1]*r34[1] + r32[2]*r34[2]) /
+		(r32mag*r34mag);
+	      cos234 = MIN(cos234,1.0);
+	      cos234 = MAX(cos234,-1.0);
+	      sin234 = sqrt(1.0 - cos234*cos234);
+	      sinl2i = 1.0/(sin234*sin234);
+	      rjl2i = 1.0/(r34mag*r34mag);
+
+	      if (sin234 != 0.0) {
+		w34 = Sp(r34mag,rcmin[jtype][ltype],rcmaxp[jtype][ltype],dw34);
+		rr = (r23mag*r23mag)-(r34mag*r34mag);
+		ril[0] = r23[0]+r34[0];
+		ril[1] = r23[1]+r34[1];
+		ril[2] = r23[2]+r34[2];
+		ril2 = (ril[0]*ril[0])+(ril[1]*ril[1])+(ril[2]*ril[2]);
+		rijrjl = 2.0*r23mag*r34mag;
+		rjl2 = r34mag*r34mag;
+		dctjl = (-rr+ril2)/(rijrjl*rjl2);
+		dctji = (rr+ril2)/(rijrjl*r23mag*r23mag);
+		dctil = -2.0/rijrjl;
+		rjlmag = r34mag;
+		rjl2 = r34mag*r34mag;
+		costmp = 0.5*(rij2+rjl2-ril2)/rijmag/rjlmag;
+		tspijl = Sp2(costmp,thmin,thmax,dtsijl);
+		dtsijl = -dtsijl;
+		prefactor = VA*Tij;
+
+		cross321[0] = (r32[1]*r21[2])-(r32[2]*r21[1]);
+		cross321[1] = (r32[2]*r21[0])-(r32[0]*r21[2]);
+		cross321[2] = (r32[0]*r21[1])-(r32[1]*r21[0]);
+		cross234[0] = (r23[1]*r34[2])-(r23[2]*r34[1]);
+		cross234[1] = (r23[2]*r34[0])-(r23[0]*r34[2]);
+		cross234[2] = (r23[0]*r34[1])-(r23[1]*r34[0]);
+
+		cwnum = (cross321[0]*cross234[0]) +
+		  (cross321[1]*cross234[1]) + (cross321[2]*cross234[2]);
+		cwnom = r21mag*r34mag*r23mag*r23mag*sin321*sin234;
+		om1234 = cwnum/cwnom;
+		cw = om1234;
+		Etmp += ((1.0-(om1234*om1234))*w21*w34) *
+		  (1.0-tspjik)*(1.0-tspijl);
+
+		dt1dik = (rik2i)-(dctik*sink2i*cos321);
+		dt1djk = (-dctjk*sink2i*cos321);
+		dt1djl = (rjl2i)-(dctjl*sinl2i*cos234);
+		dt1dil = (-dctil*sinl2i*cos234);
+		dt1dij = (2.0/(r23mag*r23mag))-(dctij*sink2i*cos321) -
+		  (dctji*sinl2i*cos234);
+
+		dt2dik[0] = (-r23[2]*cross234[1])+(r23[1]*cross234[2]);
+		dt2dik[1] = (-r23[0]*cross234[2])+(r23[2]*cross234[0]);
+		dt2dik[2] = (-r23[1]*cross234[0])+(r23[0]*cross234[1]);
+
+		dt2djl[0] = (-r23[1]*cross321[2])+(r23[2]*cross321[1]);
+		dt2djl[1] = (-r23[2]*cross321[0])+(r23[0]*cross321[2]);
+		dt2djl[2] = (-r23[0]*cross321[1])+(r23[1]*cross321[0]);
+
+		dt2dij[0] = (r21[2]*cross234[1])-(r34[2]*cross321[1]) -
+		  (r21[1]*cross234[2])+(r34[1]*cross321[2]);
+		dt2dij[1] = (r21[0]*cross234[2])-(r34[0]*cross321[2]) -
+		  (r21[2]*cross234[0])+(r34[2]*cross321[0]);
+		dt2dij[2] = (r21[1]*cross234[0])-(r34[1]*cross321[0]) -
+		  (r21[0]*cross234[1])+(r34[0]*cross321[1]);
+
+		aa = (prefactor*2.0*cw/cwnom)*w21*w34 *
+		  (1.0-tspjik)*(1.0-tspijl);
+		aaa1 = -prefactor*(1.0-(om1234*om1234)) *
+		  (1.0-tspjik)*(1.0-tspijl);
+		aaa2 = aaa1*w21*w34;
+		at2 = aa*cwnum;
+
+		fcijpc = (-dt1dij*at2)+(aaa2*dtsjik*dctij*(1.0-tspijl)) +
+		  (aaa2*dtsijl*dctji*(1.0-tspjik));
+		fcikpc = (-dt1dik*at2)+(aaa2*dtsjik*dctik*(1.0-tspijl));
+		fcjlpc = (-dt1djl*at2)+(aaa2*dtsijl*dctjl*(1.0-tspjik));
+		fcjkpc = (-dt1djk*at2)+(aaa2*dtsjik*dctjk*(1.0-tspijl));
+		fcilpc = (-dt1dil*at2)+(aaa2*dtsijl*dctil*(1.0-tspjik));
+
+		F23[0] = (fcijpc*r23[0])+(aa*dt2dij[0]);
+		F23[1] = (fcijpc*r23[1])+(aa*dt2dij[1]);
+		F23[2] = (fcijpc*r23[2])+(aa*dt2dij[2]);
+
+		F12[0] = (fcikpc*r21[0])+(aa*dt2dik[0]);
+		F12[1] = (fcikpc*r21[1])+(aa*dt2dik[1]);
+		F12[2] = (fcikpc*r21[2])+(aa*dt2dik[2]);
+
+		F34[0] = (fcjlpc*r34[0])+(aa*dt2djl[0]);
+		F34[1] = (fcjlpc*r34[1])+(aa*dt2djl[1]);
+		F34[2] = (fcjlpc*r34[2])+(aa*dt2djl[2]);
+
+		F31[0] = (fcjkpc*rjk[0]);
+		F31[1] = (fcjkpc*rjk[1]);
+		F31[2] = (fcjkpc*rjk[2]);
+
+		F24[0] = (fcilpc*ril[0]);
+		F24[1] = (fcilpc*ril[1]);
+		F24[2] = (fcilpc*ril[2]);
+
+		f1[0] = -F12[0]-F31[0];
+		f1[1] = -F12[1]-F31[1];
+		f1[2] = -F12[2]-F31[2];
+		f2[0] = F23[0]+F12[0]+F24[0];
+		f2[1] = F23[1]+F12[1]+F24[1];
+		f2[2] = F23[2]+F12[2]+F24[2];
+		f3[0] = -F23[0]+F34[0]+F31[0];
+		f3[1] = -F23[1]+F34[1]+F31[1];
+		f3[2] = -F23[2]+F34[2]+F31[2];
+		f4[0] = -F34[0]-F24[0];
+		f4[1] = -F34[1]-F24[1];
+		f4[2] = -F34[2]-F24[2];
+
+		// coordination forces
+
+		tmp2 = VA*Tij*((1.0-(om1234*om1234))) *
+		  (1.0-tspjik)*(1.0-tspijl)*dw21*w34/r21mag;
+		f2[0] -= tmp2*r21[0];
+		f2[1] -= tmp2*r21[1];
+		f2[2] -= tmp2*r21[2];
+		f1[0] += tmp2*r21[0];
+		f1[1] += tmp2*r21[1];
+		f1[2] += tmp2*r21[2];
+
+		tmp2 = VA*Tij*((1.0-(om1234*om1234))) *
+		  (1.0-tspjik)*(1.0-tspijl)*w21*dw34/r34mag;
+		f3[0] -= tmp2*r34[0];
+		f3[1] -= tmp2*r34[1];
+		f3[2] -= tmp2*r34[2];
+		f4[0] += tmp2*r34[0];
+		f4[1] += tmp2*r34[1];
+		f4[2] += tmp2*r34[2];
+
+		f[atom1][0] += f1[0]; f[atom1][1] += f1[1];
+		f[atom1][2] += f1[2];
+		f[atom2][0] += f2[0]; f[atom2][1] += f2[1];
+		f[atom2][2] += f2[2];
+		f[atom3][0] += f3[0]; f[atom3][1] += f3[1];
+		f[atom3][2] += f3[2];
+		f[atom4][0] += f4[0]; f[atom4][1] += f4[1];
+		f[atom4][2] += f4[2];
+
+		if (vflag_atom) {
+		  r13[0] = -rjk[0]; r13[1] = -rjk[1]; r13[2] = -rjk[2];
+		  r43[0] = -r34[0]; r43[1] = -r34[1]; r43[2] = -r34[2];
+		  v_tally4_thr(atom1,atom2,atom3,atom4,f1,f2,f4,r13,r23,r43,thr);
+		}
+	      }
+	    }
+	  }
+	}
+      }
+    }
+
+    // Tij forces now that we have Etmp
+
+    REBO_neighs = REBO_firstneigh[i];
+    for (k = 0; k < REBO_numneigh[i]; k++) {
+      atomk = REBO_neighs[k];
+      if (atomk != atomj) {
+	ktype = map[type[atomk]];
+	rik[0] = x[atomi][0]-x[atomk][0];
+	rik[1] = x[atomi][1]-x[atomk][1];
+	rik[2] = x[atomi][2]-x[atomk][2];
+	rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2]));
+	wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dwik);
+	Nki = nC[atomk]-(wik*kronecker(itype,0))+nH[atomk] -
+	  (wik*kronecker(itype,1));
+	SpN = Sp(Nki,Nmin,Nmax,dNki);
+
+	tmp2 = VA*dN3[0]*dwik*Etmp/rikmag;
+	f[atomi][0] -= tmp2*rik[0];
+	f[atomi][1] -= tmp2*rik[1];
+	f[atomi][2] -= tmp2*rik[2];
+	f[atomk][0] += tmp2*rik[0];
+	f[atomk][1] += tmp2*rik[1];
+	f[atomk][2] += tmp2*rik[2];
+
+	if (vflag_atom) v_tally2_thr(atomi,atomk,-tmp2,rik,thr);
+
+	tmp2 = VA*dN3[2]*(2.0*NconjtmpI*dwik*SpN)*Etmp/rikmag;
+	f[atomi][0] -= tmp2*rik[0];
+	f[atomi][1] -= tmp2*rik[1];
+	f[atomi][2] -= tmp2*rik[2];
+	f[atomk][0] += tmp2*rik[0];
+	f[atomk][1] += tmp2*rik[1];
+	f[atomk][2] += tmp2*rik[2];
+
+	if (vflag_atom) v_tally2_thr(atomi,atomk,-tmp2,rik,thr);
+
+	if (fabs(dNki) > TOL) {
+	  REBO_neighs_k = REBO_firstneigh[atomk];
+	  for (n = 0; n < REBO_numneigh[atomk]; n++) {
+	    atomn = REBO_neighs_k[n];
+	    ntype = map[type[atomn]];
+	    if (atomn != atomi) {
+	      rkn[0] = x[atomk][0]-x[atomn][0];
+	      rkn[1] = x[atomk][1]-x[atomn][1];
+	      rkn[2] = x[atomk][2]-x[atomn][2];
+	      rknmag = sqrt((rkn[0]*rkn[0])+(rkn[1]*rkn[1])+(rkn[2]*rkn[2]));
+	      Sp(rknmag,rcmin[ktype][ntype],rcmax[ktype][ntype],dwkn);
+
+	      tmp2 = VA*dN3[2]*(2.0*NconjtmpI*wik*dNki*dwkn)*Etmp/rknmag;
+	      f[atomk][0] -= tmp2*rkn[0];
+	      f[atomk][1] -= tmp2*rkn[1];
+	      f[atomk][2] -= tmp2*rkn[2];
+	      f[atomn][0] += tmp2*rkn[0];
+	      f[atomn][1] += tmp2*rkn[1];
+	      f[atomn][2] += tmp2*rkn[2];
+
+	      if (vflag_atom) v_tally2_thr(atomk,atomn,-tmp2,rkn,thr);
+	    }
+	  }
+	}
+      }
+    }
+
+    // Tij forces
+
+    REBO_neighs = REBO_firstneigh[j];
+    for (l = 0; l < REBO_numneigh[j]; l++) {
+      atoml = REBO_neighs[l];
+      if (atoml != atomi) {
+	ltype = map[type[atoml]];
+	rjl[0] = x[atomj][0]-x[atoml][0];
+	rjl[1] = x[atomj][1]-x[atoml][1];
+	rjl[2] = x[atomj][2]-x[atoml][2];
+	rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2]));
+	wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dwjl);
+	Nlj = nC[atoml]-(wjl*kronecker(jtype,0))+nH[atoml] -
+	  (wjl*kronecker(jtype,1));
+	SpN = Sp(Nlj,Nmin,Nmax,dNlj);
+
+	tmp2 = VA*dN3[1]*dwjl*Etmp/rjlmag;
+	f[atomj][0] -= tmp2*rjl[0];
+	f[atomj][1] -= tmp2*rjl[1];
+	f[atomj][2] -= tmp2*rjl[2];
+	f[atoml][0] += tmp2*rjl[0];
+	f[atoml][1] += tmp2*rjl[1];
+	f[atoml][2] += tmp2*rjl[2];
+
+	if (vflag_atom) v_tally2_thr(atomj,atoml,-tmp2,rjl,thr);
+
+	tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*dwjl*SpN)*Etmp/rjlmag;
+	f[atomj][0] -= tmp2*rjl[0];
+	f[atomj][1] -= tmp2*rjl[1];
+	f[atomj][2] -= tmp2*rjl[2];
+	f[atoml][0] += tmp2*rjl[0];
+	f[atoml][1] += tmp2*rjl[1];
+	f[atoml][2] += tmp2*rjl[2];
+
+	if (vflag_atom) v_tally2_thr(atomj,atoml,-tmp2,rjl,thr);
+
+	if (fabs(dNlj) > TOL) {
+	  REBO_neighs_l = REBO_firstneigh[atoml];
+	  for (n = 0; n < REBO_numneigh[atoml]; n++) {
+	    atomn = REBO_neighs_l[n];
+	    ntype = map[type[atomn]];
+	    if (atomn !=atomj) {
+	      rln[0] = x[atoml][0]-x[atomn][0];
+	      rln[1] = x[atoml][1]-x[atomn][1];
+	      rln[2] = x[atoml][2]-x[atomn][2];
+	      rlnmag = sqrt((rln[0]*rln[0])+(rln[1]*rln[1])+(rln[2]*rln[2]));
+	      Sp(rlnmag,rcmin[ltype][ntype],rcmax[ltype][ntype],dwln);
+
+	      tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*wjl*dNlj*dwln)*Etmp/rlnmag;
+	      f[atoml][0] -= tmp2*rln[0];
+	      f[atoml][1] -= tmp2*rln[1];
+	      f[atoml][2] -= tmp2*rln[2];
+	      f[atomn][0] += tmp2*rln[0];
+	      f[atomn][1] += tmp2*rln[1];
+	      f[atomn][2] += tmp2*rln[2];
+
+	      if (vflag_atom) v_tally2_thr(atoml,atomn,-tmp2,rln,thr);
+	    }
+	  }
+	}
+      }
+    }
+  }
+
+  bij = (0.5*(pij+pji))+piRC+(Tij*Etmp);
+  return bij;
+}
+
+/* ----------------------------------------------------------------------
+   Bij* function
+------------------------------------------------------------------------- */
+
+double PairAIREBOOMP::bondorderLJ_thr(int i, int j, double rij[3], double rijmag,
+				      double VA, double rij0[3], double rij0mag,
+				      int vflag_atom, ThrData * const thr)
+{
+  int k,n,l,atomk,atoml,atomn,atom1,atom2,atom3,atom4;
+  int atomi,atomj,itype,jtype,ktype,ltype,ntype;
+  double rik[3], rjl[3], rkn[3],rknmag,dNki;
+  double NijC,NijH,NjiC,NjiH,wik,dwik,dwkn,wjl;
+  double rikmag,rjlmag,cosjik,cosijl,g,tmp2,tmp3;
+  double Etmp,pij,tmp,wij,dwij,NconjtmpI,NconjtmpJ;
+  double Nki,Nlj,dS,lamdajik,lamdaijl,dgdc,dgdN,pji,Nijconj,piRC;
+  double dcosjikdri[3],dcosijldri[3],dcosjikdrk[3];
+  double dN2[2],dN3[3];
+  double dcosijldrj[3],dcosijldrl[3],dcosjikdrj[3],dwjl;
+  double Tij,crosskij[3],crosskijmag;
+  double crossijl[3],crossijlmag,omkijl;
+  double tmppij,tmppji,dN2PIJ[2],dN2PJI[2],dN3piRC[3],dN3Tij[3];
+  double bij,tmp3pij,tmp3pji,Stb,dStb;
+  double r32[3],r32mag,cos321;
+  double om1234,rln[3];
+  double rlnmag,dwln,r23[3],r23mag,r21[3],r21mag;
+  double w21,dw21,r34[3],r34mag,cos234,w34,dw34;
+  double cross321[3],cross234[3],prefactor,SpN;
+  double fcijpc,fcikpc,fcjlpc,fcjkpc,fcilpc;
+  double dt2dik[3],dt2djl[3],dt2dij[3],aa,aaa1,aaa2,at2,cw,cwnum,cwnom;
+  double sin321,sin234,rr,rijrik,rijrjl,rjk2,rik2,ril2,rjl2;
+  double dctik,dctjk,dctjl,dctij,dctji,dctil,rik2i,rjl2i,sink2i,sinl2i;
+  double rjk[3],ril[3],dt1dik,dt1djk,dt1djl,dt1dil,dt1dij;
+  double dNlj;
+  double PijS,PjiS;
+  double rij2,tspjik,dtsjik,tspijl,dtsijl,costmp;
+  int *REBO_neighs,*REBO_neighs_i,*REBO_neighs_j,*REBO_neighs_k,*REBO_neighs_l;
+  double F12[3],F23[3],F34[3],F31[3],F24[3];
+  double fi[3],fj[3],fk[3],fl[3],f1[3],f2[3],f3[3],f4[4];
+  double rji[3],rki[3],rlj[3],r13[3],r43[3];
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+
+  atomi = i;
+  atomj = j;
+  itype = map[type[atomi]];
+  jtype = map[type[atomj]];
+  wij = Sp(rij0mag,rcmin[itype][jtype],rcmax[itype][jtype],dwij);
+  NijC = nC[atomi]-(wij*kronecker(jtype,0));
+  NijH = nH[atomi]-(wij*kronecker(jtype,1));
+  NjiC = nC[atomj]-(wij*kronecker(itype,0));
+  NjiH = nH[atomj]-(wij*kronecker(itype,1));
+
+  bij = 0.0;
+  tmp = 0.0;
+  tmp2 = 0.0;
+  tmp3 = 0.0;
+  dgdc = 0.0;
+  dgdN = 0.0;
+  NconjtmpI = 0.0;
+  NconjtmpJ = 0.0;
+  Etmp = 0.0;
+
+  REBO_neighs = REBO_firstneigh[i];
+  for (k = 0; k < REBO_numneigh[i]; k++) {
+    atomk = REBO_neighs[k];
+    if (atomk != atomj) {
+      ktype = map[type[atomk]];
+      rik[0] = x[atomi][0]-x[atomk][0];
+      rik[1] = x[atomi][1]-x[atomk][1];
+      rik[2] = x[atomi][2]-x[atomk][2];
+      rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2]));
+      lamdajik = 4.0*kronecker(itype,1) *
+	((rho[ktype][1]-rikmag)-(rho[jtype][1]-rijmag));
+      wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dS);
+      Nki = nC[atomk]-(wik*kronecker(itype,0)) +
+	nH[atomk]-(wik*kronecker(itype,1));
+      cosjik = ((rij[0]*rik[0])+(rij[1]*rik[1])+(rij[2]*rik[2])) /
+	(rijmag*rikmag);
+      cosjik = MIN(cosjik,1.0);
+      cosjik = MAX(cosjik,-1.0);
+
+      // evaluate splines g and derivatives dg
+
+      g = gSpline(cosjik,(NijC+NijH),itype,&dgdc,&dgdN);
+      Etmp = Etmp+(wik*g*exp(lamdajik));
+      tmp3 = tmp3+(wik*dgdN*exp(lamdajik));
+      NconjtmpI = NconjtmpI+(kronecker(ktype,0)*wik*Sp(Nki,Nmin,Nmax,dS));
+    }
+  }
+
+  PijS = 0.0;
+  dN2PIJ[0] = 0.0;
+  dN2PIJ[1] = 0.0;
+  PijS = PijSpline(NijC,NijH,itype,jtype,dN2PIJ);
+  pij = 1.0/sqrt(1.0+Etmp+PijS);
+  tmppij = -.5*pij*pij*pij;
+  tmp3pij = tmp3;
+  tmp = 0.0;
+  tmp2 = 0.0;
+  tmp3 = 0.0;
+  Etmp = 0.0;
+
+  REBO_neighs = REBO_firstneigh[j];
+  for (l = 0; l < REBO_numneigh[j]; l++) {
+    atoml = REBO_neighs[l];
+    if (atoml != atomi) {
+      ltype = map[type[atoml]];
+      rjl[0] = x[atomj][0]-x[atoml][0];
+      rjl[1] = x[atomj][1]-x[atoml][1];
+      rjl[2] = x[atomj][2]-x[atoml][2];
+      rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2]));
+      lamdaijl = 4.0*kronecker(jtype,1) *
+	((rho[ltype][1]-rjlmag)-(rho[itype][1]-rijmag));
+      wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dS);
+      Nlj = nC[atoml]-(wjl*kronecker(jtype,0))+nH[atoml] -
+	(wjl*kronecker(jtype,1));
+      cosijl = -1.0*((rij[0]*rjl[0])+(rij[1]*rjl[1])+(rij[2]*rjl[2])) /
+	(rijmag*rjlmag);
+      cosijl = MIN(cosijl,1.0);
+      cosijl = MAX(cosijl,-1.0);
+
+      // evaluate splines g and derivatives dg
+
+      g = gSpline(cosijl,NjiC+NjiH,jtype,&dgdc,&dgdN);
+      Etmp = Etmp+(wjl*g*exp(lamdaijl));
+      tmp3 = tmp3+(wjl*dgdN*exp(lamdaijl));
+      NconjtmpJ = NconjtmpJ+(kronecker(ltype,0)*wjl*Sp(Nlj,Nmin,Nmax,dS));
+    }
+  }
+
+  PjiS = 0.0;
+  dN2PJI[0] = 0.0;
+  dN2PJI[1] = 0.0;
+  PjiS = PijSpline(NjiC,NjiH,jtype,itype,dN2PJI);
+  pji = 1.0/sqrt(1.0+Etmp+PjiS);
+  tmppji = -.5*pji*pji*pji;
+  tmp3pji = tmp3;
+
+  // evaluate Nij conj
+
+  Nijconj = 1.0+(NconjtmpI*NconjtmpI)+(NconjtmpJ*NconjtmpJ);
+  piRC = piRCSpline(NijC+NijH,NjiC+NjiH,Nijconj,itype,jtype,dN3piRC);
+  Tij = 0.0;
+  dN3Tij[0] = 0.0;
+  dN3Tij[1] = 0.0;
+  dN3Tij[2] = 0.0;
+  if (itype == 0 && jtype == 0)
+    Tij=TijSpline((NijC+NijH),(NjiC+NjiH),Nijconj,dN3Tij);
+
+  Etmp = 0.0;
+  if (fabs(Tij) > TOL) {
+    REBO_neighs_i = REBO_firstneigh[i];
+    for (k = 0; k < REBO_numneigh[i]; k++) {
+      atomk = REBO_neighs_i[k];
+      ktype = map[type[atomk]];
+      if (atomk != atomj) {
+	rik[0] = x[atomi][0]-x[atomk][0];
+	rik[1] = x[atomi][1]-x[atomk][1];
+	rik[2] = x[atomi][2]-x[atomk][2];
+	rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2]));
+	cos321 = ((rij[0]*rik[0])+(rij[1]*rik[1])+(rij[2]*rik[2])) /
+	  (rijmag*rikmag);
+	cos321 = MIN(cos321,1.0);
+	cos321 = MAX(cos321,-1.0);
+
+	rjk[0] = rik[0]-rij[0];
+	rjk[1] = rik[1]-rij[1];
+	rjk[2] = rik[2]-rij[2];
+	rjk2 = (rjk[0]*rjk[0])+(rjk[1]*rjk[1])+(rjk[2]*rjk[2]);
+	rij2 = rijmag*rijmag;
+	rik2 = rikmag*rikmag;
+	costmp = 0.5*(rij2+rik2-rjk2)/rijmag/rikmag;
+	tspjik = Sp2(costmp,thmin,thmax,dtsjik);
+
+	if (sqrt(1.0 - cos321*cos321) != 0.0) {
+	  wik = Sp(rikmag,rcmin[itype][ktype],rcmaxp[itype][ktype],dwik);
+	  REBO_neighs_j = REBO_firstneigh[j];
+	  for (l = 0; l < REBO_numneigh[j]; l++) {
+	    atoml = REBO_neighs_j[l];
+	    ltype = map[type[atoml]];
+	    if (!(atoml == atomi || atoml == atomk)) {
+	      rjl[0] = x[atomj][0]-x[atoml][0];
+	      rjl[1] = x[atomj][1]-x[atoml][1];
+	      rjl[2] = x[atomj][2]-x[atoml][2];
+	      rjlmag = sqrt(rjl[0]*rjl[0] + rjl[1]*rjl[1] + rjl[2]*rjl[2]);
+	      cos234 = -((rij[0]*rjl[0])+(rij[1]*rjl[1])+(rij[2]*rjl[2])) /
+		(rijmag*rjlmag);
+	      cos234 = MIN(cos234,1.0);
+	      cos234 = MAX(cos234,-1.0);
+
+	      ril[0] = rij[0]+rjl[0];
+	      ril[1] = rij[1]+rjl[1];
+	      ril[2] = rij[2]+rjl[2];
+	      ril2 = (ril[0]*ril[0])+(ril[1]*ril[1])+(ril[2]*ril[2]);
+	      rijrjl = 2.0*rijmag*rjlmag;
+	      rjl2 = rjlmag*rjlmag;
+	      costmp = 0.5*(rij2+rjl2-ril2)/rijmag/rjlmag;
+	      tspijl = Sp2(costmp,thmin,thmax,dtsijl);
+
+	      if (sqrt(1.0 - cos234*cos234) != 0.0) {
+		wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmaxp[jtype][ltype],dS);
+		crosskij[0] = (rij[1]*rik[2]-rij[2]*rik[1]);
+		crosskij[1] = (rij[2]*rik[0]-rij[0]*rik[2]);
+		crosskij[2] = (rij[0]*rik[1]-rij[1]*rik[0]);
+		crosskijmag = sqrt(crosskij[0]*crosskij[0] +
+				   crosskij[1]*crosskij[1] +
+				   crosskij[2]*crosskij[2]);
+		crossijl[0] = (rij[1]*rjl[2]-rij[2]*rjl[1]);
+		crossijl[1] = (rij[2]*rjl[0]-rij[0]*rjl[2]);
+		crossijl[2] = (rij[0]*rjl[1]-rij[1]*rjl[0]);
+		crossijlmag = sqrt(crossijl[0]*crossijl[0] +
+				   crossijl[1]*crossijl[1] +
+				   crossijl[2]*crossijl[2]);
+		omkijl = -1.0*(((crosskij[0]*crossijl[0]) +
+				(crosskij[1]*crossijl[1]) +
+				(crosskij[2]*crossijl[2])) /
+			       (crosskijmag*crossijlmag));
+		Etmp += ((1.0-(omkijl*omkijl))*wik*wjl) *
+		  (1.0-tspjik)*(1.0-tspijl);
+	      }
+	    }
+	  }
+	}
+      }
+    }
+  }
+
+  bij = (.5*(pij+pji))+piRC+(Tij*Etmp);
+  Stb = Sp2(bij,bLJmin[itype][jtype],bLJmax[itype][jtype],dStb);
+  VA = VA*dStb;
+
+  if (dStb != 0.0) {
+    tmp = tmppij;
+    dN2[0] = dN2PIJ[0];
+    dN2[1] = dN2PIJ[1];
+    tmp3 = tmp3pij;
+
+    const double invrijm = 1.0/rijmag;
+    const double invrijm2 = invrijm*invrijm;
+
+    // pij forces
+
+    REBO_neighs_i = REBO_firstneigh[i];
+    for (k = 0; k < REBO_numneigh[i]; k++) {
+      atomk = REBO_neighs_i[k];
+      if (atomk != atomj) {
+	lamdajik = 0.0;
+	rik[0] = x[atomi][0]-x[atomk][0];
+	rik[1] = x[atomi][1]-x[atomk][1];
+	rik[2] = x[atomi][2]-x[atomk][2];
+	rikmag = sqrt(rik[0]*rik[0] + rik[1]*rik[1] + rik[2]*rik[2]);
+	lamdajik = 4.0*kronecker(itype,1) *
+	  ((rho[ktype][1]-rikmag)-(rho[jtype][1]-rijmag));
+	wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dwik);
+
+	const double invrikm = 1.0/rikmag;
+	const double invrijkm = invrijm*invrikm;
+	const double invrikm2 = invrikm*invrikm;
+
+	cosjik = ((rij[0]*rik[0])+(rij[1]*rik[1])+(rij[2]*rik[2]))
+	  * invrijkm;
+	cosjik = MIN(cosjik,1.0);
+	cosjik = MAX(cosjik,-1.0);
+
+	dcosjikdri[0] = ((rij[0]+rik[0])*invrijkm) -
+	  (cosjik*((rij[0]*invrijm2)+(rik[0]*invrikm2)));
+	dcosjikdri[1] = ((rij[1]+rik[1])*invrijkm) -
+	  (cosjik*((rij[1]*invrijm2)+(rik[1]*invrikm2)));
+	dcosjikdri[2] = ((rij[2]+rik[2])*invrijkm) -
+	  (cosjik*((rij[2]*invrijm2)+(rik[2]*invrikm2)));
+	dcosjikdrk[0] = (-rij[0]*invrijkm) + (cosjik*(rik[0]*invrikm2));
+	dcosjikdrk[1] = (-rij[1]*invrijkm) + (cosjik*(rik[1]*invrikm2));
+	dcosjikdrk[2] = (-rij[2]*invrijkm) + (cosjik*(rik[2]*invrikm2));
+	dcosjikdrj[0] = (-rik[0]*invrijkm) + (cosjik*(rij[0]*invrijm2));
+	dcosjikdrj[1] = (-rik[1]*invrijkm) + (cosjik*(rij[1]*invrijm2));
+	dcosjikdrj[2] = (-rik[2]*invrijkm) + (cosjik*(rij[2]*invrijm2));
+
+	g = gSpline(cosjik,(NijC+NijH),itype,&dgdc,&dgdN);
+
+	tmp2 = VA*.5*(tmp*wik*dgdc*exp(lamdajik));
+	fj[0] = -tmp2*dcosjikdrj[0];
+	fj[1] = -tmp2*dcosjikdrj[1];
+	fj[2] = -tmp2*dcosjikdrj[2];
+	fi[0] = -tmp2*dcosjikdri[0];
+	fi[1] = -tmp2*dcosjikdri[1];
+	fi[2] = -tmp2*dcosjikdri[2];
+	fk[0] = -tmp2*dcosjikdrk[0];
+	fk[1] = -tmp2*dcosjikdrk[1];
+	fk[2] = -tmp2*dcosjikdrk[2];
+
+	tmp2 = VA*.5*(tmp*wik*g*exp(lamdajik)*4.0*kronecker(itype,1));
+	fj[0] -= tmp2*(-rij[0]*invrijm);
+	fj[1] -= tmp2*(-rij[1]*invrijm);
+	fj[2] -= tmp2*(-rij[2]*invrijm);
+	fi[0] -= tmp2*((-rik[0]/rikmag)+(rij[0]*invrijm));
+	fi[1] -= tmp2*((-rik[1]/rikmag)+(rij[1]*invrijm));
+	fi[2] -= tmp2*((-rik[2]/rikmag)+(rij[2]*invrijm));
+	fk[0] -= tmp2*(rik[0]/rikmag);
+	fk[1] -= tmp2*(rik[1]/rikmag);
+	fk[2] -= tmp2*(rik[2]/rikmag);
+
+	// coordination forces
+
+	// dwik forces
+
+	tmp2 = VA*.5*(tmp*dwik*g*exp(lamdajik))/rikmag;
+	fi[0] -= tmp2*rik[0];
+	fi[1] -= tmp2*rik[1];
+	fi[2] -= tmp2*rik[2];
+	fk[0] += tmp2*rik[0];
+	fk[1] += tmp2*rik[1];
+	fk[2] += tmp2*rik[2];
+
+	// PIJ forces
+
+	tmp2 = VA*.5*(tmp*dN2[ktype]*dwik)/rikmag;
+	fi[0] -= tmp2*rik[0];
+	fi[1] -= tmp2*rik[1];
+	fi[2] -= tmp2*rik[2];
+	fk[0] += tmp2*rik[0];
+	fk[1] += tmp2*rik[1];
+	fk[2] += tmp2*rik[2];
+
+	// dgdN forces
+
+	tmp2 = VA*.5*(tmp*tmp3*dwik)/rikmag;
+	fi[0] -= tmp2*rik[0];
+	fi[1] -= tmp2*rik[1];
+	fi[2] -= tmp2*rik[2];
+	fk[0] += tmp2*rik[0];
+	fk[1] += tmp2*rik[1];
+	fk[2] += tmp2*rik[2];
+
+	f[atomi][0] += fi[0]; f[atomi][1] += fi[1]; f[atomi][2] += fi[2];
+	f[atomj][0] += fj[0]; f[atomj][1] += fj[1]; f[atomj][2] += fj[2];
+	f[atomk][0] += fk[0]; f[atomk][1] += fk[1]; f[atomk][2] += fk[2];
+
+	if (vflag_atom) {
+	  rji[0] = -rij[0]; rji[1] = -rij[1]; rji[2] = -rij[2];
+	  rki[0] = -rik[0]; rki[1] = -rik[1]; rki[2] = -rik[2];
+	  v_tally3_thr(atomi,atomj,atomk,fj,fk,rji,rki,thr);
+	}
+      }
+    }
+
+    tmp = tmppji;
+    tmp3 = tmp3pji;
+    dN2[0] = dN2PJI[0];
+    dN2[1] = dN2PJI[1];
+    REBO_neighs  =  REBO_firstneigh[j];
+    for (l = 0; l < REBO_numneigh[j]; l++) {
+      atoml = REBO_neighs[l];
+      if (atoml !=atomi) {
+	ltype = map[type[atoml]];
+	rjl[0] = x[atomj][0]-x[atoml][0];
+	rjl[1] = x[atomj][1]-x[atoml][1];
+	rjl[2] = x[atomj][2]-x[atoml][2];
+	rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2]));
+	lamdaijl = 4.0*kronecker(jtype,1) *
+	  ((rho[ltype][1]-rjlmag)-(rho[itype][1]-rijmag));
+	wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dwjl);
+
+	const double invrjlm = 1.0/rjlmag;
+	const double invrijlm = invrijm*invrjlm;
+	const double invrjlm2 = invrjlm*invrjlm;
+
+	cosijl = (-1.0*((rij[0]*rjl[0])+(rij[1]*rjl[1])+(rij[2]*rjl[2]))) *
+	  invrijlm;
+	cosijl = MIN(cosijl,1.0);
+	cosijl = MAX(cosijl,-1.0);
+
+	dcosijldri[0] = (-rjl[0]*invrijlm) - (cosijl*rij[0]*invrijm2);
+	dcosijldri[1] = (-rjl[1]*invrijlm) - (cosijl*rij[1]*invrijm2);
+	dcosijldri[2] = (-rjl[2]*invrijlm) - (cosijl*rij[2]*invrijm2);
+	dcosijldrj[0] = ((-rij[0]+rjl[0])*invrijlm) +
+	  (cosijl*((rij[0]*invrijm2)-(rjl[0]*invrjlm2)));
+	dcosijldrj[1] = ((-rij[1]+rjl[1])*invrijlm) +
+	  (cosijl*((rij[1]*invrijm2)-(rjl[1]*invrjlm2)));
+	dcosijldrj[2] = ((-rij[2]+rjl[2])*invrijlm) +
+	  (cosijl*((rij[2]*invrijm2)-(rjl[2]*invrjlm2)));
+	dcosijldrl[0] = (rij[0]*invrijlm) + (cosijl*rjl[0]*invrjlm2);
+	dcosijldrl[1] = (rij[1]*invrijlm) + (cosijl*rjl[1]*invrjlm2);
+	dcosijldrl[2] = (rij[2]*invrijlm) + (cosijl*rjl[2]*invrjlm2);
+
+	// evaluate splines g and derivatives dg
+
+	g = gSpline(cosijl,NjiC+NjiH,jtype,&dgdc,&dgdN);
+	tmp2 = VA*.5*(tmp*wjl*dgdc*exp(lamdaijl));
+	fi[0] = -tmp2*dcosijldri[0];
+	fi[1] = -tmp2*dcosijldri[1];
+	fi[2] = -tmp2*dcosijldri[2];
+	fj[0] = -tmp2*dcosijldrj[0];
+	fj[1] = -tmp2*dcosijldrj[1];
+	fj[2] = -tmp2*dcosijldrj[2];
+	fl[0] = -tmp2*dcosijldrl[0];
+	fl[1] = -tmp2*dcosijldrl[1];
+	fl[2] = -tmp2*dcosijldrl[2];
+
+	tmp2 = VA*.5*(tmp*wjl*g*exp(lamdaijl)*4.0*kronecker(jtype,1));
+	fi[0] -= tmp2*(rij[0]*invrijm);
+	fi[1] -= tmp2*(rij[1]*invrijm);
+	fi[2] -= tmp2*(rij[2]*invrijm);
+	fj[0] -= tmp2*((-rjl[0]*invrjlm)-(rij[0]*invrijm));
+	fj[1] -= tmp2*((-rjl[1]*invrjlm)-(rij[1]*invrijm));
+	fj[2] -= tmp2*((-rjl[2]*invrjlm)-(rij[2]*invrijm));
+	fl[0] -= tmp2*(rjl[0]*invrjlm);
+	fl[1] -= tmp2*(rjl[1]*invrjlm);
+	fl[2] -= tmp2*(rjl[2]*invrjlm);
+
+ 	// coordination forces
+	// dwik forces
+
+	tmp2 = VA*.5*(tmp*dwjl*g*exp(lamdaijl))*invrjlm;
+	fj[0] -= tmp2*rjl[0];
+	fj[1] -= tmp2*rjl[1];
+	fj[2] -= tmp2*rjl[2];
+	fl[0] += tmp2*rjl[0];
+	fl[1] += tmp2*rjl[1];
+	fl[2] += tmp2*rjl[2];
+
+	// PIJ forces
+
+	tmp2 = VA*.5*(tmp*dN2[ltype]*dwjl)*invrjlm;
+	fj[0] -= tmp2*rjl[0];
+	fj[1] -= tmp2*rjl[1];
+	fj[2] -= tmp2*rjl[2];
+	fl[0] += tmp2*rjl[0];
+	fl[1] += tmp2*rjl[1];
+	fl[2] += tmp2*rjl[2];
+
+	// dgdN forces
+
+	tmp2=VA*.5*(tmp*tmp3*dwjl)*invrjlm;
+	fj[0] -= tmp2*rjl[0];
+	fj[1] -= tmp2*rjl[1];
+	fj[2] -= tmp2*rjl[2];
+	fl[0] += tmp2*rjl[0];
+	fl[1] += tmp2*rjl[1];
+	fl[2] += tmp2*rjl[2];
+
+	f[atomi][0] += fi[0]; f[atomi][1] += fi[1]; f[atomi][2] += fi[2];
+	f[atomj][0] += fj[0]; f[atomj][1] += fj[1]; f[atomj][2] += fj[2];
+	f[atoml][0] += fl[0]; f[atoml][1] += fl[1]; f[atoml][2] += fl[2];
+
+	if (vflag_atom) {
+	  rlj[0] = -rjl[0]; rlj[1] = -rjl[1]; rlj[2] = -rjl[2];
+	  v_tally3_thr(atomi,atomj,atoml,fi,fl,rij,rlj,thr);
+	}
+      }
+    }
+
+    // piRC forces
+
+    dN3[0] = dN3piRC[0];
+    dN3[1] = dN3piRC[1];
+    dN3[2] = dN3piRC[2];
+
+    REBO_neighs_i = REBO_firstneigh[i];
+    for (k = 0; k < REBO_numneigh[i]; k++) {
+      atomk = REBO_neighs_i[k];
+      if (atomk != atomj) {
+	ktype = map[type[atomk]];
+	rik[0] = x[atomi][0]-x[atomk][0];
+	rik[1] = x[atomi][1]-x[atomk][1];
+	rik[2] = x[atomi][2]-x[atomk][2];
+	rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2]));
+	wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dwik);
+	Nki = nC[atomk]-(wik*kronecker(itype,0))+nH[atomk] -
+	  (wik*kronecker(itype,1));
+	SpN = Sp(Nki,Nmin,Nmax,dNki);
+
+	tmp2 = VA*dN3[0]*dwik/rikmag;
+	f[atomi][0] -= tmp2*rik[0];
+	f[atomi][1] -= tmp2*rik[1];
+	f[atomi][2] -= tmp2*rik[2];
+	f[atomk][0] += tmp2*rik[0];
+	f[atomk][1] += tmp2*rik[1];
+	f[atomk][2] += tmp2*rik[2];
+
+	if (vflag_atom) v_tally2_thr(atomi,atomk,-tmp2,rik,thr);
+
+	tmp2 = VA*dN3[2]*(2.0*NconjtmpI*dwik*SpN)/rikmag;
+	f[atomi][0] -= tmp2*rik[0];
+	f[atomi][1] -= tmp2*rik[1];
+	f[atomi][2] -= tmp2*rik[2];
+	f[atomk][0] += tmp2*rik[0];
+	f[atomk][1] += tmp2*rik[1];
+	f[atomk][2] += tmp2*rik[2];
+
+	if (vflag_atom) v_tally2_thr(atomi,atomk,-tmp2,rik,thr);
+
+	if (fabs(dNki) > TOL) {
+	  REBO_neighs_k = REBO_firstneigh[atomk];
+	  for (n = 0; n < REBO_numneigh[atomk]; n++) {
+	    atomn = REBO_neighs_k[n];
+	    if (atomn != atomi) {
+	      ntype = map[type[atomn]];
+	      rkn[0] = x[atomk][0]-x[atomn][0];
+	      rkn[1] = x[atomk][1]-x[atomn][1];
+	      rkn[2] = x[atomk][2]-x[atomn][2];
+	      rknmag = sqrt((rkn[0]*rkn[0])+(rkn[1]*rkn[1])+(rkn[2]*rkn[2]));
+	      Sp(rknmag,rcmin[ktype][ntype],rcmax[ktype][ntype],dwkn);
+
+	      tmp2 = VA*dN3[2]*(2.0*NconjtmpI*wik*dNki*dwkn)/rknmag;
+	      f[atomk][0] -= tmp2*rkn[0];
+	      f[atomk][1] -= tmp2*rkn[1];
+	      f[atomk][2] -= tmp2*rkn[2];
+	      f[atomn][0] += tmp2*rkn[0];
+	      f[atomn][1] += tmp2*rkn[1];
+	      f[atomn][2] += tmp2*rkn[2];
+
+	      if (vflag_atom) v_tally2_thr(atomk,atomn,-tmp2,rkn,thr);
+	    }
+	  }
+	}
+      }
+    }
+
+    // piRC forces to J side
+
+    REBO_neighs = REBO_firstneigh[j];
+    for (l = 0; l < REBO_numneigh[j]; l++) {
+      atoml = REBO_neighs[l];
+      if (atoml != atomi) {
+	ltype = map[type[atoml]];
+	rjl[0] = x[atomj][0]-x[atoml][0];
+	rjl[1] = x[atomj][1]-x[atoml][1];
+	rjl[2] = x[atomj][2]-x[atoml][2];
+	rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2]));
+	wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dwjl);
+	Nlj = nC[atoml]-(wjl*kronecker(jtype,0))+nH[atoml] -
+	  (wjl*kronecker(jtype,1));
+	SpN = Sp(Nlj,Nmin,Nmax,dNlj);
+
+	tmp2 = VA*dN3[1]*dwjl/rjlmag;
+	f[atomj][0] -= tmp2*rjl[0];
+	f[atomj][1] -= tmp2*rjl[1];
+	f[atomj][2] -= tmp2*rjl[2];
+	f[atoml][0] += tmp2*rjl[0];
+	f[atoml][1] += tmp2*rjl[1];
+	f[atoml][2] += tmp2*rjl[2];
+
+	if (vflag_atom) v_tally2_thr(atomj,atoml,-tmp2,rjl,thr);
+
+	tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*dwjl*SpN)/rjlmag;
+	f[atomj][0] -= tmp2*rjl[0];
+	f[atomj][1] -= tmp2*rjl[1];
+	f[atomj][2] -= tmp2*rjl[2];
+	f[atoml][0] += tmp2*rjl[0];
+	f[atoml][1] += tmp2*rjl[1];
+	f[atoml][2] += tmp2*rjl[2];
+
+	if (vflag_atom) v_tally2_thr(atomj,atoml,-tmp2,rjl,thr);
+
+	if (fabs(dNlj) > TOL) {
+	  REBO_neighs_l = REBO_firstneigh[atoml];
+	  for (n = 0; n < REBO_numneigh[atoml]; n++) {
+	    atomn = REBO_neighs_l[n];
+	    if (atomn != atomj) {
+	      ntype = map[type[atomn]];
+	      rln[0] = x[atoml][0]-x[atomn][0];
+	      rln[1] = x[atoml][1]-x[atomn][1];
+	      rln[2] = x[atoml][2]-x[atomn][2];
+	      rlnmag = sqrt((rln[0]*rln[0])+(rln[1]*rln[1])+(rln[2]*rln[2]));
+	      Sp(rlnmag,rcmin[ltype][ntype],rcmax[ltype][ntype],dwln);
+
+	      tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*wjl*dNlj*dwln)/rlnmag;
+	      f[atoml][0] -= tmp2*rln[0];
+	      f[atoml][1] -= tmp2*rln[1];
+	      f[atoml][2] -= tmp2*rln[2];
+	      f[atomn][0] += tmp2*rln[0];
+	      f[atomn][1] += tmp2*rln[1];
+	      f[atomn][2] += tmp2*rln[2];
+
+	      if (vflag_atom) v_tally2_thr(atoml,atomn,-tmp2,rln,thr);
+	    }
+	  }
+	}
+      }
+    }
+
+    if (fabs(Tij) > TOL) {
+      dN3[0] = dN3Tij[0];
+      dN3[1] = dN3Tij[1];
+      dN3[2] = dN3Tij[2];
+      atom2 = atomi;
+      atom3 = atomj;
+      r32[0] = x[atom3][0]-x[atom2][0];
+      r32[1] = x[atom3][1]-x[atom2][1];
+      r32[2] = x[atom3][2]-x[atom2][2];
+      r32mag = sqrt((r32[0]*r32[0])+(r32[1]*r32[1])+(r32[2]*r32[2]));
+      r23[0] = -r32[0];
+      r23[1] = -r32[1];
+      r23[2] = -r32[2];
+      r23mag = r32mag;
+
+      REBO_neighs_i = REBO_firstneigh[i];
+      for (k = 0; k < REBO_numneigh[i]; k++) {
+	atomk = REBO_neighs_i[k];
+	atom1 = atomk;
+	ktype = map[type[atomk]];
+	if (atomk != atomj) {
+	  r21[0] = x[atom2][0]-x[atom1][0];
+	  r21[1] = x[atom2][1]-x[atom1][1];
+	  r21[2] = x[atom2][2]-x[atom1][2];
+	  r21mag = sqrt(r21[0]*r21[0] + r21[1]*r21[1] + r21[2]*r21[2]);
+	  cos321 = ((r21[0]*rij[0])+(r21[1]*rij[1])+(r21[2]*rij[2])) /
+	    (r21mag*rijmag);
+	  cos321 = MIN(cos321,1.0);
+	  cos321 = MAX(cos321,-1.0);
+	  sin321 = sqrt(1.0 - cos321*cos321);
+	  sink2i = 1.0/(sin321*sin321);
+	  rik2i = 1.0/(r21mag*r21mag);
+
+	  if (sin321 != 0.0) {
+	    rr = (rijmag*rijmag)-(r21mag*r21mag);
+	    rjk[0] = r21[0]-rij[0];
+	    rjk[1] = r21[1]-rij[1];
+	    rjk[2] = r21[2]-rij[2];
+	    rjk2 = (rjk[0]*rjk[0])+(rjk[1]*rjk[1])+(rjk[2]*rjk[2]);
+	    rijrik = 2.0*rijmag*r21mag;
+	    rik2 = r21mag*r21mag;
+	    dctik = (-rr+rjk2)/(rijrik*rik2);
+	    dctij = (rr+rjk2)/(rijrik*rijmag*rijmag);
+	    dctjk = -2.0/rijrik;
+	    w21 = Sp(r21mag,rcmin[itype][ktype],rcmaxp[itype][ktype],dw21);
+	    rikmag = r21mag;
+	    rij2 = r32mag*r32mag;
+	    rik2 = r21mag*r21mag;
+	    costmp = 0.5*(rij2+rik2-rjk2)/rijmag/rikmag;
+	    tspjik = Sp2(costmp,thmin,thmax,dtsjik);
+	    dtsjik = -dtsjik;
+
+	    REBO_neighs_j = REBO_firstneigh[j];
+	    for (l = 0; l < REBO_numneigh[j]; l++) {
+	      atoml = REBO_neighs_j[l];
+	      atom4 = atoml;
+	      ltype = map[type[atoml]];
+	      if (!(atoml == atomi || atoml == atomk)) {
+		r34[0] = x[atom3][0]-x[atom4][0];
+		r34[1] = x[atom3][1]-x[atom4][1];
+		r34[2] = x[atom3][2]-x[atom4][2];
+		r34mag = sqrt(r34[0]*r34[0] + r34[1]*r34[1] + r34[2]*r34[2]);
+		cos234 = -1.0*((rij[0]*r34[0])+(rij[1]*r34[1]) +
+			       (rij[2]*r34[2]))/(rijmag*r34mag);
+		cos234 = MIN(cos234,1.0);
+		cos234 = MAX(cos234,-1.0);
+		sin234 = sqrt(1.0 - cos234*cos234);
+		sinl2i = 1.0/(sin234*sin234);
+		rjl2i = 1.0/(r34mag*r34mag);
+
+		if (sin234 != 0.0) {
+		  w34 = Sp(r34mag,rcmin[jtype][ltype],
+			   rcmaxp[jtype][ltype],dw34);
+		  rr = (r23mag*r23mag)-(r34mag*r34mag);
+		  ril[0] = r23[0]+r34[0];
+		  ril[1] = r23[1]+r34[1];
+		  ril[2] = r23[2]+r34[2];
+		  ril2 = (ril[0]*ril[0])+(ril[1]*ril[1])+(ril[2]*ril[2]);
+		  rijrjl = 2.0*r23mag*r34mag;
+		  rjl2 = r34mag*r34mag;
+		  dctjl = (-rr+ril2)/(rijrjl*rjl2);
+		  dctji = (rr+ril2)/(rijrjl*r23mag*r23mag);
+		  dctil = -2.0/rijrjl;
+		  rjlmag = r34mag;
+		  rjl2 = r34mag*r34mag;
+		  costmp = 0.5*(rij2+rjl2-ril2)/rijmag/rjlmag;
+		  tspijl = Sp2(costmp,thmin,thmax,dtsijl);
+		  dtsijl = -dtsijl; //need minus sign
+		  prefactor = VA*Tij;
+
+		  cross321[0] = (r32[1]*r21[2])-(r32[2]*r21[1]);
+		  cross321[1] = (r32[2]*r21[0])-(r32[0]*r21[2]);
+		  cross321[2] = (r32[0]*r21[1])-(r32[1]*r21[0]);
+		  cross234[0] = (r23[1]*r34[2])-(r23[2]*r34[1]);
+		  cross234[1] = (r23[2]*r34[0])-(r23[0]*r34[2]);
+		  cross234[2] = (r23[0]*r34[1])-(r23[1]*r34[0]);
+
+		  cwnum = (cross321[0]*cross234[0]) +
+		    (cross321[1]*cross234[1])+(cross321[2]*cross234[2]);
+		  cwnom = r21mag*r34mag*r23mag*r23mag*sin321*sin234;
+		  om1234 = cwnum/cwnom;
+		  cw = om1234;
+		  Etmp += ((1.0-(om1234*om1234))*w21*w34) *
+		    (1.0-tspjik)*(1.0-tspijl);
+
+		  dt1dik = (rik2i)-(dctik*sink2i*cos321);
+		  dt1djk = (-dctjk*sink2i*cos321);
+		  dt1djl = (rjl2i)-(dctjl*sinl2i*cos234);
+		  dt1dil = (-dctil*sinl2i*cos234);
+		  dt1dij = (2.0/(r23mag*r23mag)) -
+		    (dctij*sink2i*cos321)-(dctji*sinl2i*cos234);
+
+		  dt2dik[0] = (-r23[2]*cross234[1])+(r23[1]*cross234[2]);
+		  dt2dik[1] = (-r23[0]*cross234[2])+(r23[2]*cross234[0]);
+		  dt2dik[2] = (-r23[1]*cross234[0])+(r23[0]*cross234[1]);
+
+		  dt2djl[0] = (-r23[1]*cross321[2])+(r23[2]*cross321[1]);
+		  dt2djl[1] = (-r23[2]*cross321[0])+(r23[0]*cross321[2]);
+		  dt2djl[2] = (-r23[0]*cross321[1])+(r23[1]*cross321[0]);
+
+		  dt2dij[0] = (r21[2]*cross234[1]) -
+		    (r34[2]*cross321[1])-(r21[1]*cross234[2]) +
+		    (r34[1]*cross321[2]);
+		  dt2dij[1] = (r21[0]*cross234[2]) -
+		    (r34[0]*cross321[2])-(r21[2]*cross234[0]) +
+		    (r34[2]*cross321[0]);
+		  dt2dij[2] = (r21[1]*cross234[0]) -
+		    (r34[1]*cross321[0])-(r21[0]*cross234[1]) +
+		    (r34[0]*cross321[1]);
+
+		  aa = (prefactor*2.0*cw/cwnom)*w21*w34 *
+		    (1.0-tspjik)*(1.0-tspijl);
+		  aaa1 = -prefactor*(1.0-(om1234*om1234)) *
+		    (1.0-tspjik)*(1.0-tspijl);
+		  aaa2 = aaa1*w21*w34;
+		  at2 = aa*cwnum;
+
+		  fcijpc = (-dt1dij*at2)+(aaa2*dtsjik*dctij*(1.0-tspijl)) +
+		    (aaa2*dtsijl*dctji*(1.0-tspjik));
+		  fcikpc = (-dt1dik*at2)+(aaa2*dtsjik*dctik*(1.0-tspijl));
+		  fcjlpc = (-dt1djl*at2)+(aaa2*dtsijl*dctjl*(1.0-tspjik));
+		  fcjkpc = (-dt1djk*at2)+(aaa2*dtsjik*dctjk*(1.0-tspijl));
+		  fcilpc = (-dt1dil*at2)+(aaa2*dtsijl*dctil*(1.0-tspjik));
+
+		  F23[0] = (fcijpc*r23[0])+(aa*dt2dij[0]);
+		  F23[1] = (fcijpc*r23[1])+(aa*dt2dij[1]);
+		  F23[2] = (fcijpc*r23[2])+(aa*dt2dij[2]);
+
+		  F12[0] = (fcikpc*r21[0])+(aa*dt2dik[0]);
+		  F12[1] = (fcikpc*r21[1])+(aa*dt2dik[1]);
+		  F12[2] = (fcikpc*r21[2])+(aa*dt2dik[2]);
+
+		  F34[0] = (fcjlpc*r34[0])+(aa*dt2djl[0]);
+		  F34[1] = (fcjlpc*r34[1])+(aa*dt2djl[1]);
+		  F34[2] = (fcjlpc*r34[2])+(aa*dt2djl[2]);
+
+		  F31[0] = (fcjkpc*rjk[0]);
+		  F31[1] = (fcjkpc*rjk[1]);
+		  F31[2] = (fcjkpc*rjk[2]);
+
+		  F24[0] = (fcilpc*ril[0]);
+		  F24[1] = (fcilpc*ril[1]);
+		  F24[2] = (fcilpc*ril[2]);
+
+		  f1[0] = -F12[0]-F31[0];
+		  f1[1] = -F12[1]-F31[1];
+		  f1[2] = -F12[2]-F31[2];
+		  f2[0] = F23[0]+F12[0]+F24[0];
+		  f2[1] = F23[1]+F12[1]+F24[1];
+		  f2[2] = F23[2]+F12[2]+F24[2];
+		  f3[0] = -F23[0]+F34[0]+F31[0];
+		  f3[1] = -F23[1]+F34[1]+F31[1];
+		  f3[2] = -F23[2]+F34[2]+F31[2];
+		  f4[0] = -F34[0]-F24[0];
+		  f4[1] = -F34[1]-F24[1];
+		  f4[2] = -F34[2]-F24[2];
+
+		  // coordination forces
+
+		  tmp2 = VA*Tij*((1.0-(om1234*om1234))) *
+		    (1.0-tspjik)*(1.0-tspijl)*dw21*w34/r21mag;
+		  f2[0] -= tmp2*r21[0];
+		  f2[1] -= tmp2*r21[1];
+		  f2[2] -= tmp2*r21[2];
+		  f1[0] += tmp2*r21[0];
+		  f1[1] += tmp2*r21[1];
+		  f1[2] += tmp2*r21[2];
+
+		  tmp2 = VA*Tij*((1.0-(om1234*om1234))) *
+		    (1.0-tspjik)*(1.0-tspijl)*w21*dw34/r34mag;
+		  f3[0] -= tmp2*r34[0];
+		  f3[1] -= tmp2*r34[1];
+		  f3[2] -= tmp2*r34[2];
+		  f4[0] += tmp2*r34[0];
+		  f4[1] += tmp2*r34[1];
+		  f4[2] += tmp2*r34[2];
+
+		  f[atom1][0] += f1[0]; f[atom1][1] += f1[1];
+		  f[atom1][2] += f1[2];
+		  f[atom2][0] += f2[0]; f[atom2][1] += f2[1];
+		  f[atom2][2] += f2[2];
+		  f[atom3][0] += f3[0]; f[atom3][1] += f3[1];
+		  f[atom3][2] += f3[2];
+		  f[atom4][0] += f4[0]; f[atom4][1] += f4[1];
+		  f[atom4][2] += f4[2];
+
+		  if (vflag_atom) {
+		    r13[0] = -rjk[0]; r13[1] = -rjk[1]; r13[2] = -rjk[2];
+		    r43[0] = -r34[0]; r43[1] = -r34[1]; r43[2] = -r34[2];
+		    v_tally4_thr(atom1,atom2,atom3,atom4,f1,f2,f4,r13,r23,r43,thr);
+		  }
+		}
+	      }
+	    }
+	  }
+	}
+      }
+
+      REBO_neighs = REBO_firstneigh[i];
+      for (k = 0; k < REBO_numneigh[i]; k++) {
+	atomk = REBO_neighs[k];
+	if (atomk != atomj) {
+	  ktype = map[type[atomk]];
+	  rik[0] = x[atomi][0]-x[atomk][0];
+	  rik[1] = x[atomi][1]-x[atomk][1];
+	  rik[2] = x[atomi][2]-x[atomk][2];
+	  rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2]));
+	  wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dwik);
+	  Nki = nC[atomk]-(wik*kronecker(itype,0))+nH[atomk] -
+	    (wik*kronecker(itype,1));
+	  SpN = Sp(Nki,Nmin,Nmax,dNki);
+
+	  tmp2 = VA*dN3[0]*dwik*Etmp/rikmag;
+	  f[atomi][0] -= tmp2*rik[0];
+	  f[atomi][1] -= tmp2*rik[1];
+	  f[atomi][2] -= tmp2*rik[2];
+	  f[atomk][0] += tmp2*rik[0];
+	  f[atomk][1] += tmp2*rik[1];
+	  f[atomk][2] += tmp2*rik[2];
+
+	  if (vflag_atom) v_tally2_thr(atomi,atomk,-tmp2,rik,thr);
+
+	  tmp2 = VA*dN3[2]*(2.0*NconjtmpI*dwik*SpN)*Etmp/rikmag;
+	  f[atomi][0] -= tmp2*rik[0];
+	  f[atomi][1] -= tmp2*rik[1];
+	  f[atomi][2] -= tmp2*rik[2];
+	  f[atomk][0] += tmp2*rik[0];
+	  f[atomk][1] += tmp2*rik[1];
+	  f[atomk][2] += tmp2*rik[2];
+
+	  if (vflag_atom) v_tally2_thr(atomi,atomk,-tmp2,rik,thr);
+
+	  if (fabs(dNki)  >TOL) {
+	    REBO_neighs_k = REBO_firstneigh[atomk];
+	    for (n = 0; n < REBO_numneigh[atomk]; n++) {
+	      atomn = REBO_neighs_k[n];
+	      ntype = map[type[atomn]];
+	      if (atomn !=atomi) {
+		rkn[0] = x[atomk][0]-x[atomn][0];
+		rkn[1] = x[atomk][1]-x[atomn][1];
+		rkn[2] = x[atomk][2]-x[atomn][2];
+		rknmag = sqrt((rkn[0]*rkn[0])+(rkn[1]*rkn[1])+(rkn[2]*rkn[2]));
+		Sp(rknmag,rcmin[ktype][ntype],rcmax[ktype][ntype],dwkn);
+
+		tmp2 = VA*dN3[2]*(2.0*NconjtmpI*wik*dNki*dwkn)*Etmp/rknmag;
+		f[atomk][0] -= tmp2*rkn[0];
+		f[atomk][1] -= tmp2*rkn[1];
+		f[atomk][2] -= tmp2*rkn[2];
+		f[atomn][0] += tmp2*rkn[0];
+		f[atomn][1] += tmp2*rkn[1];
+		f[atomn][2] += tmp2*rkn[2];
+
+		if (vflag_atom) v_tally2_thr(atomk,atomn,-tmp2,rkn,thr);
+	      }
+	    }
+	  }
+	}
+      }
+
+      // Tij forces
+
+      REBO_neighs = REBO_firstneigh[j];
+      for (l = 0; l < REBO_numneigh[j]; l++) {
+	atoml = REBO_neighs[l];
+	if (atoml != atomi) {
+	  ltype = map[type[atoml]];
+	  rjl[0] = x[atomj][0]-x[atoml][0];
+	  rjl[1] = x[atomj][1]-x[atoml][1];
+	  rjl[2] = x[atomj][2]-x[atoml][2];
+	  rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2]));
+	  wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dwjl);
+	  Nlj = nC[atoml]-(wjl*kronecker(jtype,0))+nH[atoml] -
+	    (wjl*kronecker(jtype,1));
+	  SpN = Sp(Nlj,Nmin,Nmax,dNlj);
+
+	  tmp2 = VA*dN3[1]*dwjl*Etmp/rjlmag;
+	  f[atomj][0] -= tmp2*rjl[0];
+	  f[atomj][1] -= tmp2*rjl[1];
+	  f[atomj][2] -= tmp2*rjl[2];
+	  f[atoml][0] += tmp2*rjl[0];
+	  f[atoml][1] += tmp2*rjl[1];
+	  f[atoml][2] += tmp2*rjl[2];
+
+	  if (vflag_atom) v_tally2_thr(atomj,atoml,-tmp2,rjl,thr);
+
+	  tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*dwjl*SpN)*Etmp/rjlmag;
+	  f[atomj][0] -= tmp2*rjl[0];
+	  f[atomj][1] -= tmp2*rjl[1];
+	  f[atomj][2] -= tmp2*rjl[2];
+	  f[atoml][0] += tmp2*rjl[0];
+	  f[atoml][1] += tmp2*rjl[1];
+	  f[atoml][2] += tmp2*rjl[2];
+
+	  if (vflag_atom) v_tally2_thr(atomj,atoml,-tmp2,rjl,thr);
+
+	  if (fabs(dNlj) > TOL) {
+	    REBO_neighs_l = REBO_firstneigh[atoml];
+	    for (n = 0; n < REBO_numneigh[atoml]; n++) {
+	      atomn = REBO_neighs_l[n];
+	      ntype = map[type[atomn]];
+	      if (atomn != atomj) {
+		rln[0] = x[atoml][0]-x[atomn][0];
+		rln[1] = x[atoml][1]-x[atomn][1];
+		rln[2] = x[atoml][2]-x[atomn][2];
+		rlnmag = sqrt((rln[0]*rln[0])+(rln[1]*rln[1])+(rln[2]*rln[2]));
+		Sp(rlnmag,rcmin[ltype][ntype],rcmax[ltype][ntype],dwln);
+
+		tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*wjl*dNlj*dwln)*Etmp/rlnmag;
+		f[atoml][0] -= tmp2*rln[0];
+		f[atoml][1] -= tmp2*rln[1];
+		f[atoml][2] -= tmp2*rln[2];
+		f[atomn][0] += tmp2*rln[0];
+		f[atomn][1] += tmp2*rln[1];
+		f[atomn][2] += tmp2*rln[2];
+
+		if (vflag_atom) v_tally2_thr(atoml,atomn,-tmp2,rln,thr);
+	      }
+	    }
+	  }
+	}
+      }
+    }
+  }
+
+  return Stb;
+}
+
+/* ----------------------------------------------------------------------
+   REBO forces and energy
+------------------------------------------------------------------------- */
+
+void PairAIREBOOMP::FREBO_thr(int ifrom, int ito, int evflag, int eflag,
+			      int vflag_atom, ThrData * const thr)
+{
+  int i,j,k,m,ii,itype,jtype,itag,jtag;
+  double delx,dely,delz,evdwl,fpair,xtmp,ytmp,ztmp;
+  double rsq,rij,wij;
+  double Qij,Aij,alphaij,VR,pre,dVRdi,VA,term,bij,dVAdi,dVA;
+  double dwij,del[3];
+  int *ilist,*REBO_neighs;
+
+  evdwl = 0.0;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  int *type = atom->type;
+  int *tag = atom->tag;
+  int nlocal = atom->nlocal;
+
+  ilist = list->ilist;
+
+  // two-body interactions from REBO neighbor list, skip half of them
+
+  for (ii = ifrom; ii < ito; ii++) {
+    i = ilist[ii];
+    itag = tag[i];
+    itype = map[type[i]];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    REBO_neighs = REBO_firstneigh[i];
+
+    for (k = 0; k < REBO_numneigh[i]; k++) {
+      j = REBO_neighs[k];
+      jtag = tag[j];
+
+      if (itag > jtag) {
+	if ((itag+jtag) % 2 == 0) continue;
+      } else if (itag < jtag) {
+	if ((itag+jtag) % 2 == 1) continue;
+      } else {
+	if (x[j][2] < ztmp) continue;
+	if (x[j][2] == ztmp && x[j][1] < ytmp) continue;
+	if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue;
+      }
+
+      jtype = map[type[j]];
+
+      delx = x[i][0] - x[j][0];
+      dely = x[i][1] - x[j][1];
+      delz = x[i][2] - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      rij = sqrt(rsq);
+      wij = Sp(rij,rcmin[itype][jtype],rcmax[itype][jtype],dwij);
+      if (wij <= TOL) continue;
+
+      Qij = Q[itype][jtype];
+      Aij = A[itype][jtype];
+      alphaij = alpha[itype][jtype];
+
+      VR = wij*(1.0+(Qij/rij)) * Aij*exp(-alphaij*rij);
+      pre = wij*Aij * exp(-alphaij*rij);
+      dVRdi = pre * ((-alphaij)-(Qij/rsq)-(Qij*alphaij/rij));
+      dVRdi += VR/wij * dwij;
+
+      VA = dVA = 0.0;
+      for (m = 0; m < 3; m++) {
+	term = -wij * BIJc[itype][jtype][m] * exp(-Beta[itype][jtype][m]*rij);
+	VA += term;
+	dVA += -Beta[itype][jtype][m] * term;
+      }
+      dVA += VA/wij * dwij;
+      del[0] = delx;
+      del[1] = dely;
+      del[2] = delz;
+      bij = bondorder_thr(i,j,del,rij,VA,vflag_atom,thr);
+      dVAdi = bij*dVA;
+
+      fpair = -(dVRdi+dVAdi) / rij;
+      f[i][0] += delx*fpair;
+      f[i][1] += dely*fpair;
+      f[i][2] += delz*fpair;
+      f[j][0] -= delx*fpair;
+      f[j][1] -= dely*fpair;
+      f[j][2] -= delz*fpair;
+
+      if (eflag) evdwl = VR + bij*VA;
+      if (evflag) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1,
+			       evdwl,0.0,fpair,delx,dely,delz,thr);
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   compute LJ forces and energy
+   find 3- and 4-step paths between atoms I,J via REBO neighbor lists
+------------------------------------------------------------------------- */
+
+void PairAIREBOOMP::FLJ_thr(int ifrom, int ito, int evflag, int eflag,
+			    int vflag_atom, ThrData * const thr)
+{
+  int i,j,k,m,ii,jj,kk,mm,jnum,itype,jtype,ktype,mtype,itag,jtag;
+  int atomi,atomj,atomk,atomm;
+  int testpath,npath,done;
+  double evdwl,fpair,xtmp,ytmp,ztmp;
+  double rsq,best,wik,wkm,cij,rij,dwij,dwik,dwkj,dwkm,dwmj;
+  double delij[3],rijsq,delik[3],rik,deljk[3];
+  double rkj,wkj,dC,VLJ,dVLJ,VA,Str,dStr,Stb;
+  double vdw,slw,dvdw,dslw,drij,swidth,tee,tee2;
+  double rljmin,rljmax,sigcut,sigmin,sigwid;
+  double delkm[3],rkm,deljm[3],rmj,wmj,r2inv,r6inv,scale,delscale[3];
+  int *ilist,*jlist,*numneigh,**firstneigh;
+  int *REBO_neighs_i,*REBO_neighs_k;
+  double delikS[3],deljkS[3],delkmS[3],deljmS[3],delimS[3];
+  double rikS,rkjS,rkmS,rmjS,wikS,dwikS;
+  double wkjS,dwkjS,wkmS,dwkmS,wmjS,dwmjS;
+  double fpair1,fpair2,fpair3;
+  double fi[3],fj[3],fk[3],fm[3];
+
+  // I-J interaction from full neighbor list
+  // skip 1/2 of interactions since only consider each pair once
+
+  evdwl = 0.0;
+  rljmin = 0.0;
+  rljmax = 0.0;
+  sigcut = 0.0;
+  sigmin = 0.0;
+  sigwid = 0.0;
+
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  int *tag = atom->tag;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = ifrom; ii < ito; ii++) {
+    i = ilist[ii];
+    itag = tag[i];
+    itype = map[type[i]];
+    atomi = i;
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+      jtag = tag[j];
+
+      if (itag > jtag) {
+	if ((itag+jtag) % 2 == 0) continue;
+      } else if (itag < jtag) {
+	if ((itag+jtag) % 2 == 1) continue;
+      } else {
+	if (x[j][2] < ztmp) continue;
+	if (x[j][2] == ztmp && x[j][1] < ytmp) continue;
+	if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue;
+      }
+
+      jtype = map[type[j]];
+      atomj = j;
+
+      delij[0] = xtmp - x[j][0];
+      delij[1] = ytmp - x[j][1];
+      delij[2] = ztmp - x[j][2];
+      rijsq = delij[0]*delij[0] + delij[1]*delij[1] + delij[2]*delij[2];
+
+      // if outside of LJ cutoff, skip
+      // if outside of 4-path cutoff, best = 0.0, no need to test paths
+      // if outside of 2-path cutoff but inside 4-path cutoff,
+      //   best = 0.0, test 3-,4-paths
+      // if inside 2-path cutoff, best = wij, only test 3-,4-paths if best < 1
+
+      if (rijsq >= cutljsq[itype][jtype]) continue;
+      rij = sqrt(rijsq);
+      if (rij >= cut3rebo) {
+	best = 0.0;
+	testpath = 0;
+      } else if (rij >= rcmax[itype][jtype]) {
+	best = 0.0;
+	testpath = 1;
+      } else {
+	best = Sp(rij,rcmin[itype][jtype],rcmax[itype][jtype],dwij);
+	npath = 2;
+	if (best < 1.0) testpath = 1;
+	else testpath = 0;
+      }
+
+      done = 0;
+      if (testpath) {
+
+	// test all 3-body paths = I-K-J
+	// I-K interactions come from atom I's REBO neighbors
+	// if wik > current best, compute wkj
+	// if best = 1.0, done
+
+	REBO_neighs_i = REBO_firstneigh[i];
+	for (kk = 0; kk < REBO_numneigh[i] && done==0; kk++) {
+	  k = REBO_neighs_i[kk];
+	  if (k == j) continue;
+	  ktype = map[type[k]];
+
+	  delik[0] = x[i][0] - x[k][0];
+	  delik[1] = x[i][1] - x[k][1];
+	  delik[2] = x[i][2] - x[k][2];
+	  rsq = delik[0]*delik[0] + delik[1]*delik[1] + delik[2]*delik[2];
+	  if (rsq < rcmaxsq[itype][ktype]) {
+	    rik = sqrt(rsq);
+	    wik = Sp(rik,rcmin[itype][ktype],rcmax[itype][ktype],dwik);
+	  } else wik = 0.0;
+
+	  if (wik > best) {
+	    deljk[0] = x[j][0] - x[k][0];
+	    deljk[1] = x[j][1] - x[k][1];
+	    deljk[2] = x[j][2] - x[k][2];
+	    rsq = deljk[0]*deljk[0] + deljk[1]*deljk[1] + deljk[2]*deljk[2];
+	    if (rsq < rcmaxsq[ktype][jtype]) {
+	      rkj = sqrt(rsq);
+	      wkj = Sp(rkj,rcmin[ktype][jtype],rcmax[ktype][jtype],dwkj);
+	      if (wik*wkj > best) {
+		best = wik*wkj;
+		npath = 3;
+ 		atomk = k;
+            	delikS[0] = delik[0];
+            	delikS[1] = delik[1];
+            	delikS[2] = delik[2];
+	    	rikS = rik;
+	    	wikS = wik;
+	    	dwikS = dwik;
+            	deljkS[0] = deljk[0];
+            	deljkS[1] = deljk[1];
+            	deljkS[2] = deljk[2];
+	    	rkjS = rkj;
+	    	wkjS = wkj;
+	    	dwkjS = dwkj;
+		if (best == 1.0) {
+		  done = 1;
+		  break;
+		}
+	      }
+	    }
+
+	    // test all 4-body paths = I-K-M-J
+	    // K-M interactions come from atom K's REBO neighbors
+	    // if wik*wkm > current best, compute wmj
+	    // if best = 1.0, done
+
+	    REBO_neighs_k = REBO_firstneigh[k];
+	    for (mm = 0; mm < REBO_numneigh[k] && done==0; mm++) {
+	      m = REBO_neighs_k[mm];
+	      if (m == i || m == j) continue;
+	      mtype = map[type[m]];
+	      delkm[0] = x[k][0] - x[m][0];
+	      delkm[1] = x[k][1] - x[m][1];
+	      delkm[2] = x[k][2] - x[m][2];
+	      rsq = delkm[0]*delkm[0] + delkm[1]*delkm[1] + delkm[2]*delkm[2];
+	      if (rsq < rcmaxsq[ktype][mtype]) {
+		rkm = sqrt(rsq);
+		wkm = Sp(rkm,rcmin[ktype][mtype],rcmax[ktype][mtype],dwkm);
+	      } else wkm = 0.0;
+
+	      if (wik*wkm > best) {
+		deljm[0] = x[j][0] - x[m][0];
+		deljm[1] = x[j][1] - x[m][1];
+		deljm[2] = x[j][2] - x[m][2];
+		rsq = deljm[0]*deljm[0] + deljm[1]*deljm[1] +
+		  deljm[2]*deljm[2];
+		if (rsq < rcmaxsq[mtype][jtype]) {
+		  rmj = sqrt(rsq);
+		  wmj = Sp(rmj,rcmin[mtype][jtype],rcmax[mtype][jtype],dwmj);
+		  if (wik*wkm*wmj > best) {
+		    best = wik*wkm*wmj;
+		    npath = 4;
+		    atomk = k;
+		    delikS[0] = delik[0];
+	            delikS[1] = delik[1];
+        	    delikS[2] = delik[2];
+		    rikS = rik;
+		    wikS = wik;
+		    dwikS = dwik;
+		    atomm = m;
+	            delkmS[0] = delkm[0];
+        	    delkmS[1] = delkm[1];
+            	    delkmS[2] = delkm[2];
+		    rkmS = rkm;
+		    wkmS = wkm;
+		    dwkmS = dwkm;
+	            deljmS[0] = deljm[0];
+        	    deljmS[1] = deljm[1];
+           	    deljmS[2] = deljm[2];
+		    rmjS = rmj;
+		    wmjS = wmj;
+		    dwmjS = dwmj;
+		    if (best == 1.0) {
+		      done = 1;
+		      break;
+		    }
+		  }
+		}
+	      }
+	    }
+	  }
+	}
+      }
+
+      cij = 1.0 - best;
+      if (cij == 0.0) continue;
+
+      // compute LJ forces and energy
+
+      sigwid = 0.84;
+      sigcut = 3.0;
+      sigmin = sigcut - sigwid;
+
+      rljmin = sigma[itype][jtype];
+      rljmax = sigcut * rljmin;
+      rljmin = sigmin * rljmin;
+
+      if (rij > rljmax){
+         slw = 0.0;
+         dslw = 0.0;}
+      else if (rij > rljmin){
+         drij = rij - rljmin;
+         swidth = rljmax - rljmin;
+         tee = drij / swidth;
+         tee2 = tee*tee;
+         slw = 1.0 - tee2 * (3.0 - 2.0 * tee);
+         dslw = 6.0 * tee * (1.0 - tee) / rij / swidth;
+      }
+      else
+         slw = 1.0;
+         dslw = 0.0;
+
+      r2inv = 1.0/rijsq;
+      r6inv = r2inv*r2inv*r2inv;
+
+      vdw = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]);
+      dvdw = -r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]) / rij;
+
+      // VLJ now becomes vdw * slw, derivaties, etc.
+
+      VLJ = vdw * slw;
+      dVLJ = dvdw * slw + vdw * dslw;
+
+      Str = Sp2(rij,rcLJmin[itype][jtype],rcLJmax[itype][jtype],dStr);
+      VA = Str*cij*VLJ;
+      if (Str > 0.0) {
+	scale = rcmin[itype][jtype] / rij;
+	delscale[0] = scale * delij[0];
+	delscale[1] = scale * delij[1];
+	delscale[2] = scale * delij[2];
+	Stb = bondorderLJ_thr(i,j,delscale,rcmin[itype][jtype],VA,
+			      delij,rij,vflag_atom,thr);
+      } else Stb = 0.0;
+
+      fpair = -(dStr * (Stb*cij*VLJ - cij*VLJ) +
+		dVLJ * (Str*Stb*cij + cij - Str*cij)) / rij;
+
+      f[i][0] += delij[0]*fpair;
+      f[i][1] += delij[1]*fpair;
+      f[i][2] += delij[2]*fpair;
+      f[j][0] -= delij[0]*fpair;
+      f[j][1] -= delij[1]*fpair;
+      f[j][2] -= delij[2]*fpair;
+
+      if (eflag) evdwl = VA*Stb + (1.0-Str)*cij*VLJ;
+      if (evflag) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1,
+			       evdwl,0.0,fpair,delij[0],delij[1],delij[2],thr);
+
+      if (cij < 1.0) {
+	dC = Str*Stb*VLJ + (1.0-Str)*VLJ;
+	if (npath == 2) {
+	  fpair = dC*dwij / rij;
+	  f[atomi][0] += delij[0]*fpair;
+	  f[atomi][1] += delij[1]*fpair;
+	  f[atomi][2] += delij[2]*fpair;
+	  f[atomj][0] -= delij[0]*fpair;
+	  f[atomj][1] -= delij[1]*fpair;
+	  f[atomj][2] -= delij[2]*fpair;
+
+	  if (vflag_atom) v_tally2_thr(atomi,atomj,fpair,delij,thr);
+
+	} else if (npath == 3) {
+	  fpair1 = dC*dwikS*wkjS / rikS;
+	  fi[0] = delikS[0]*fpair1;
+	  fi[1] = delikS[1]*fpair1;
+	  fi[2] = delikS[2]*fpair1;
+	  fpair2 = dC*wikS*dwkjS / rkjS;
+	  fj[0] = deljkS[0]*fpair2;
+	  fj[1] = deljkS[1]*fpair2;
+	  fj[2] = deljkS[2]*fpair2;
+
+	  f[atomi][0] += fi[0];
+	  f[atomi][1] += fi[1];
+	  f[atomi][2] += fi[2];
+	  f[atomj][0] += fj[0];
+	  f[atomj][1] += fj[1];
+	  f[atomj][2] += fj[2];
+	  f[atomk][0] -= fi[0] + fj[0];
+	  f[atomk][1] -= fi[1] + fj[1];
+	  f[atomk][2] -= fi[2] + fj[2];
+
+	  if (vflag_atom)
+	    v_tally3_thr(atomi,atomj,atomk,fi,fj,delikS,deljkS,thr);
+
+	} else {
+	  fpair1 = dC*dwikS*wkmS*wmjS / rikS;
+	  fi[0] = delikS[0]*fpair1;
+	  fi[1] = delikS[1]*fpair1;
+	  fi[2] = delikS[2]*fpair1;
+
+	  fpair2 = dC*wikS*dwkmS*wmjS / rkmS;
+	  fk[0] = delkmS[0]*fpair2 - fi[0];
+	  fk[1] = delkmS[1]*fpair2 - fi[1];
+	  fk[2] = delkmS[2]*fpair2 - fi[2];
+
+	  fpair3 = dC*wikS*wkmS*dwmjS / rmjS;
+	  fj[0] = deljmS[0]*fpair3;
+	  fj[1] = deljmS[1]*fpair3;
+	  fj[2] = deljmS[2]*fpair3;
+
+	  fm[0] = -delkmS[0]*fpair2 - fj[0];
+	  fm[1] = -delkmS[1]*fpair2 - fj[1];
+	  fm[2] = -delkmS[2]*fpair2 - fj[2];
+
+	  f[atomi][0] += fi[0];
+	  f[atomi][1] += fi[1];
+	  f[atomi][2] += fi[2];
+	  f[atomj][0] += fj[0];
+	  f[atomj][1] += fj[1];
+	  f[atomj][2] += fj[2];
+	  f[atomk][0] += fk[0];
+	  f[atomk][1] += fk[1];
+	  f[atomk][2] += fk[2];
+	  f[atomm][0] += fm[0];
+	  f[atomm][1] += fm[1];
+	  f[atomm][2] += fm[2];
+
+	  if (vflag_atom) {
+	    delimS[0] = delikS[0] + delkmS[0];
+	    delimS[1] = delikS[1] + delkmS[1];
+	    delimS[2] = delikS[2] + delkmS[2];
+	    v_tally4_thr(atomi,atomj,atomk,atomm,fi,fj,fk,delimS,deljmS,delkmS,thr);
+	  }
+	}
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   torsional forces and energy
+------------------------------------------------------------------------- */
+
+void PairAIREBOOMP::TORSION_thr(int ifrom, int ito,
+				int evflag, int eflag, ThrData * const thr)
+{
+  int i,j,k,l,ii,itag,jtag;
+  double evdwl,fpair,xtmp,ytmp,ztmp;
+  double cos321;
+  double w21,dw21,cos234,w34,dw34;
+  double cross321[3],cross321mag,cross234[3],cross234mag;
+  double w23,dw23,cw2,ekijl,Ec;
+  double cw,cwnum,cwnom;
+  double rij,rij2,rik,rjl,tspjik,dtsjik,tspijl,dtsijl,costmp,fcpc;
+  double sin321,sin234,rjk2,rik2,ril2,rjl2;
+  double rjk,ril;
+  double Vtors;
+  double dndij[3],tmpvec[3],dndik[3],dndjl[3];
+  double dcidij,dcidik,dcidjk,dcjdji,dcjdjl,dcjdil;
+  double dsidij,dsidik,dsidjk,dsjdji,dsjdjl,dsjdil;
+  double dxidij,dxidik,dxidjk,dxjdji,dxjdjl,dxjdil;
+  double ddndij,ddndik,ddndjk,ddndjl,ddndil,dcwddn,dcwdn,dvpdcw,Ftmp[3];
+  double del32[3],rsq,r32,del23[3],del21[3],r21;
+  double deljk[3],del34[3],delil[3],delkl[3],r23,r34;
+  double fi[3],fj[3],fk[3],fl[3];
+  int itype,jtype,ktype,ltype,kk,ll,jj;
+  int *ilist,*REBO_neighs_i,*REBO_neighs_j;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  int *type = atom->type;
+  int *tag = atom->tag;
+
+  ilist = list->ilist;
+
+  for (ii = ifrom; ii < ito; ii++) {
+    i = ilist[ii];
+    itag = tag[i];
+    itype = map[type[i]];
+    if (itype != 0) continue;
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    REBO_neighs_i = REBO_firstneigh[i];
+
+    for (jj = 0; jj < REBO_numneigh[i]; jj++) {
+      j = REBO_neighs_i[jj];
+      jtag = tag[j];
+
+      if (itag > jtag) {
+	if ((itag+jtag) % 2 == 0) continue;
+      } else if (itag < jtag) {
+	if ((itag+jtag) % 2 == 1) continue;
+      } else {
+	if (x[j][2] < ztmp) continue;
+	if (x[j][2] == ztmp && x[j][1] < ytmp) continue;
+	if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue;
+      }
+
+      jtype = map[type[j]];
+      if (jtype != 0) continue;
+
+      del32[0] = x[j][0]-x[i][0];
+      del32[1] = x[j][1]-x[i][1];
+      del32[2] = x[j][2]-x[i][2];
+      rsq = del32[0]*del32[0] + del32[1]*del32[1] + del32[2]*del32[2];
+      r32 = sqrt(rsq);
+      del23[0] = -del32[0];
+      del23[1] = -del32[1];
+      del23[2] = -del32[2];
+      r23 = r32;
+      w23 = Sp(r23,rcmin[itype][jtype],rcmax[itype][jtype],dw23);
+
+      for (kk = 0; kk < REBO_numneigh[i]; kk++) {
+	k = REBO_neighs_i[kk];
+	ktype = map[type[k]];
+	if (k == j) continue;
+	del21[0] = x[i][0]-x[k][0];
+	del21[1] = x[i][1]-x[k][1];
+	del21[2] = x[i][2]-x[k][2];
+	rsq = del21[0]*del21[0] + del21[1]*del21[1] + del21[2]*del21[2];
+	r21 = sqrt(rsq);
+	cos321 = - ((del21[0]*del32[0]) + (del21[1]*del32[1]) +
+		    (del21[2]*del32[2])) / (r21*r32);
+	cos321 = MIN(cos321,1.0);
+	cos321 = MAX(cos321,-1.0);
+	sin321 = sqrt(1.0 - cos321*cos321);
+	if (sin321 < TOL) continue;
+
+	deljk[0] = del21[0]-del23[0];
+	deljk[1] = del21[1]-del23[1];
+	deljk[2] = del21[2]-del23[2];
+	rjk2 = deljk[0]*deljk[0] + deljk[1]*deljk[1] + deljk[2]*deljk[2];
+	rjk=sqrt(rjk2);
+	rik2 = r21*r21;
+	w21 = Sp(r21,rcmin[itype][ktype],rcmax[itype][ktype],dw21);
+
+	rij = r32;
+	rik = r21;
+	rij2 = r32*r32;
+	rik2 = r21*r21;
+	costmp = 0.5*(rij2+rik2-rjk2)/rij/rik;
+	tspjik = Sp2(costmp,thmin,thmax,dtsjik);
+	dtsjik = -dtsjik;
+
+	REBO_neighs_j = REBO_firstneigh[j];
+	for (ll = 0; ll < REBO_numneigh[j]; ll++) {
+	  l = REBO_neighs_j[ll];
+	  ltype = map[type[l]];
+	  if (l == i || l == k) continue;
+	  del34[0] = x[j][0]-x[l][0];
+	  del34[1] = x[j][1]-x[l][1];
+	  del34[2] = x[j][2]-x[l][2];
+	  rsq = del34[0]*del34[0] + del34[1]*del34[1] + del34[2]*del34[2];
+	  r34 = sqrt(rsq);
+	  cos234 = (del32[0]*del34[0] + del32[1]*del34[1] +
+		    del32[2]*del34[2]) / (r32*r34);
+	  cos234 = MIN(cos234,1.0);
+	  cos234 = MAX(cos234,-1.0);
+	  sin234 = sqrt(1.0 - cos234*cos234);
+	  if (sin234 < TOL) continue;
+	  w34 = Sp(r34,rcmin[jtype][ltype],rcmax[jtype][ltype],dw34);
+	  delil[0] = del23[0] + del34[0];
+	  delil[1] = del23[1] + del34[1];
+	  delil[2] = del23[2] + del34[2];
+	  ril2 = delil[0]*delil[0] + delil[1]*delil[1] + delil[2]*delil[2];
+	  ril=sqrt(ril2);
+	  rjl2 = r34*r34;
+
+	  rjl = r34;
+	  rjl2 = r34*r34;
+	  costmp = 0.5*(rij2+rjl2-ril2)/rij/rjl;
+	  tspijl = Sp2(costmp,thmin,thmax,dtsijl);
+	  dtsijl = -dtsijl; //need minus sign
+	  cross321[0] = (del32[1]*del21[2])-(del32[2]*del21[1]);
+	  cross321[1] = (del32[2]*del21[0])-(del32[0]*del21[2]);
+	  cross321[2] = (del32[0]*del21[1])-(del32[1]*del21[0]);
+	  cross321mag = sqrt(cross321[0]*cross321[0]+
+			     cross321[1]*cross321[1]+
+			     cross321[2]*cross321[2]);
+	  cross234[0] = (del23[1]*del34[2])-(del23[2]*del34[1]);
+	  cross234[1] = (del23[2]*del34[0])-(del23[0]*del34[2]);
+	  cross234[2] = (del23[0]*del34[1])-(del23[1]*del34[0]);
+	  cross234mag = sqrt(cross234[0]*cross234[0]+
+			     cross234[1]*cross234[1]+
+			     cross234[2]*cross234[2]);
+	  cwnum = (cross321[0]*cross234[0]) +
+	    (cross321[1]*cross234[1])+(cross321[2]*cross234[2]);
+	  cwnom = r21*r34*r32*r32*sin321*sin234;
+	  cw = cwnum/cwnom;
+
+	  cw2 = (.5*(1.0-cw));
+	  ekijl = epsilonT[ktype][ltype];
+	  Ec = 256.0*ekijl/405.0;
+	  Vtors = (Ec*(pow(cw2,5.0)))-(ekijl/10.0);
+
+	  if (eflag) evdwl = Vtors*w21*w23*w34*(1.0-tspjik)*(1.0-tspijl);
+
+	  dndij[0] = (cross234[1]*del21[2])-(cross234[2]*del21[1]);
+	  dndij[1] = (cross234[2]*del21[0])-(cross234[0]*del21[2]);
+	  dndij[2] = (cross234[0]*del21[1])-(cross234[1]*del21[0]);
+
+	  tmpvec[0] = (del34[1]*cross321[2])-(del34[2]*cross321[1]);
+	  tmpvec[1] = (del34[2]*cross321[0])-(del34[0]*cross321[2]);
+	  tmpvec[2] = (del34[0]*cross321[1])-(del34[1]*cross321[0]);
+
+	  dndij[0] = dndij[0]+tmpvec[0];
+	  dndij[1] = dndij[1]+tmpvec[1];
+	  dndij[2] = dndij[2]+tmpvec[2];
+
+	  dndik[0] = (del23[1]*cross234[2])-(del23[2]*cross234[1]);
+	  dndik[1] = (del23[2]*cross234[0])-(del23[0]*cross234[2]);
+	  dndik[2] = (del23[0]*cross234[1])-(del23[1]*cross234[0]);
+
+	  dndjl[0] = (cross321[1]*del23[2])-(cross321[2]*del23[1]);
+	  dndjl[1] = (cross321[2]*del23[0])-(cross321[0]*del23[2]);
+	  dndjl[2] = (cross321[0]*del23[1])-(cross321[1]*del23[0]);
+
+	  dcidij = ((r23*r23)-(r21*r21)+(rjk*rjk))/(2.0*r23*r23*r21);
+	  dcidik = ((r21*r21)-(r23*r23)+(rjk*rjk))/(2.0*r23*r21*r21);
+	  dcidjk = (-rjk)/(r23*r21);
+	  dcjdji = ((r23*r23)-(r34*r34)+(ril*ril))/(2.0*r23*r23*r34);
+	  dcjdjl = ((r34*r34)-(r23*r23)+(ril*ril))/(2.0*r23*r34*r34);
+	  dcjdil = (-ril)/(r23*r34);
+
+	  dsidij = (-cos321/sin321)*dcidij;
+	  dsidik = (-cos321/sin321)*dcidik;
+	  dsidjk = (-cos321/sin321)*dcidjk;
+
+	  dsjdji = (-cos234/sin234)*dcjdji;
+	  dsjdjl = (-cos234/sin234)*dcjdjl;
+	  dsjdil = (-cos234/sin234)*dcjdil;
+
+	  dxidij = (r21*sin321)+(r23*r21*dsidij);
+	  dxidik = (r23*sin321)+(r23*r21*dsidik);
+	  dxidjk = (r23*r21*dsidjk);
+
+	  dxjdji = (r34*sin234)+(r23*r34*dsjdji);
+	  dxjdjl = (r23*sin234)+(r23*r34*dsjdjl);
+	  dxjdil = (r23*r34*dsjdil);
+
+	  ddndij = (dxidij*cross234mag)+(cross321mag*dxjdji);
+	  ddndik = dxidik*cross234mag;
+	  ddndjk = dxidjk*cross234mag;
+	  ddndjl = cross321mag*dxjdjl;
+	  ddndil = cross321mag*dxjdil;
+	  dcwddn = -cwnum/(cwnom*cwnom);
+	  dcwdn = 1.0/cwnom;
+	  dvpdcw = (-1.0)*Ec*(-.5)*5.0*pow(cw2,4.0) *
+	    w23*w21*w34*(1.0-tspjik)*(1.0-tspijl);
+
+	  Ftmp[0] = dvpdcw*((dcwdn*dndij[0])+(dcwddn*ddndij*del23[0]/r23));
+	  Ftmp[1] = dvpdcw*((dcwdn*dndij[1])+(dcwddn*ddndij*del23[1]/r23));
+	  Ftmp[2] = dvpdcw*((dcwdn*dndij[2])+(dcwddn*ddndij*del23[2]/r23));
+	  fi[0] = Ftmp[0];
+	  fi[1] = Ftmp[1];
+	  fi[2] = Ftmp[2];
+	  fj[0] = -Ftmp[0];
+	  fj[1] = -Ftmp[1];
+	  fj[2] = -Ftmp[2];
+
+	  Ftmp[0] = dvpdcw*((dcwdn*dndik[0])+(dcwddn*ddndik*del21[0]/r21));
+	  Ftmp[1] = dvpdcw*((dcwdn*dndik[1])+(dcwddn*ddndik*del21[1]/r21));
+	  Ftmp[2] = dvpdcw*((dcwdn*dndik[2])+(dcwddn*ddndik*del21[2]/r21));
+	  fi[0] += Ftmp[0];
+	  fi[1] += Ftmp[1];
+	  fi[2] += Ftmp[2];
+	  fk[0] = -Ftmp[0];
+	  fk[1] = -Ftmp[1];
+	  fk[2] = -Ftmp[2];
+
+	  Ftmp[0] = (dvpdcw*dcwddn*ddndjk*deljk[0])/rjk;
+	  Ftmp[1] = (dvpdcw*dcwddn*ddndjk*deljk[1])/rjk;
+	  Ftmp[2] = (dvpdcw*dcwddn*ddndjk*deljk[2])/rjk;
+	  fj[0] += Ftmp[0];
+	  fj[1] += Ftmp[1];
+	  fj[2] += Ftmp[2];
+	  fk[0] -= Ftmp[0];
+	  fk[1] -= Ftmp[1];
+	  fk[2] -= Ftmp[2];
+
+	  Ftmp[0] = dvpdcw*((dcwdn*dndjl[0])+(dcwddn*ddndjl*del34[0]/r34));
+	  Ftmp[1] = dvpdcw*((dcwdn*dndjl[1])+(dcwddn*ddndjl*del34[1]/r34));
+	  Ftmp[2] = dvpdcw*((dcwdn*dndjl[2])+(dcwddn*ddndjl*del34[2]/r34));
+	  fj[0] += Ftmp[0];
+	  fj[1] += Ftmp[1];
+	  fj[2] += Ftmp[2];
+	  fl[0] = -Ftmp[0];
+	  fl[1] = -Ftmp[1];
+	  fl[2] = -Ftmp[2];
+
+	  Ftmp[0] = (dvpdcw*dcwddn*ddndil*delil[0])/ril;
+	  Ftmp[1] = (dvpdcw*dcwddn*ddndil*delil[1])/ril;
+	  Ftmp[2] = (dvpdcw*dcwddn*ddndil*delil[2])/ril;
+	  fi[0] += Ftmp[0];
+	  fi[1] += Ftmp[1];
+	  fi[2] += Ftmp[2];
+	  fl[0] -= Ftmp[0];
+	  fl[1] -= Ftmp[1];
+	  fl[2] -= Ftmp[2];
+
+	  // coordination forces
+
+	  fpair = Vtors*dw21*w23*w34*(1.0-tspjik)*(1.0-tspijl) / r21;
+	  fi[0] -= del21[0]*fpair;
+	  fi[1] -= del21[1]*fpair;
+	  fi[2] -= del21[2]*fpair;
+	  fk[0] += del21[0]*fpair;
+	  fk[1] += del21[1]*fpair;
+	  fk[2] += del21[2]*fpair;
+
+	  fpair = Vtors*w21*dw23*w34*(1.0-tspjik)*(1.0-tspijl) / r23;
+	  fi[0] -= del23[0]*fpair;
+	  fi[1] -= del23[1]*fpair;
+	  fi[2] -= del23[2]*fpair;
+	  fj[0] += del23[0]*fpair;
+	  fj[1] += del23[1]*fpair;
+	  fj[2] += del23[2]*fpair;
+
+	  fpair = Vtors*w21*w23*dw34*(1.0-tspjik)*(1.0-tspijl) / r34;
+	  fj[0] -= del34[0]*fpair;
+	  fj[1] -= del34[1]*fpair;
+	  fj[2] -= del34[2]*fpair;
+	  fl[0] += del34[0]*fpair;
+	  fl[1] += del34[1]*fpair;
+	  fl[2] += del34[2]*fpair;
+
+	  // additional cut off function forces
+
+	  fcpc = -Vtors*w21*w23*w34*dtsjik*(1.0-tspijl);
+	  fpair = fcpc*dcidij/rij;
+	  fi[0] += fpair*del23[0];
+	  fi[1] += fpair*del23[1];
+	  fi[2] += fpair*del23[2];
+	  fj[0] -= fpair*del23[0];
+	  fj[1] -= fpair*del23[1];
+	  fj[2] -= fpair*del23[2];
+
+	  fpair = fcpc*dcidik/rik;
+	  fi[0] += fpair*del21[0];
+	  fi[1] += fpair*del21[1];
+	  fi[2] += fpair*del21[2];
+	  fk[0] -= fpair*del21[0];
+	  fk[1] -= fpair*del21[1];
+	  fk[2] -= fpair*del21[2];
+
+	  fpair = fcpc*dcidjk/rjk;
+	  fj[0] += fpair*deljk[0];
+	  fj[1] += fpair*deljk[1];
+	  fj[2] += fpair*deljk[2];
+	  fk[0] -= fpair*deljk[0];
+	  fk[1] -= fpair*deljk[1];
+	  fk[2] -= fpair*deljk[2];
+
+	  fcpc = -Vtors*w21*w23*w34*(1.0-tspjik)*dtsijl;
+	  fpair = fcpc*dcjdji/rij;
+	  fi[0] += fpair*del23[0];
+	  fi[1] += fpair*del23[1];
+	  fi[2] += fpair*del23[2];
+	  fj[0] -= fpair*del23[0];
+	  fj[1] -= fpair*del23[1];
+	  fj[2] -= fpair*del23[2];
+
+	  fpair = fcpc*dcjdjl/rjl;
+	  fj[0] += fpair*del34[0];
+	  fj[1] += fpair*del34[1];
+	  fj[2] += fpair*del34[2];
+	  fl[0] -= fpair*del34[0];
+	  fl[1] -= fpair*del34[1];
+	  fl[2] -= fpair*del34[2];
+
+	  fpair = fcpc*dcjdil/ril;
+	  fi[0] += fpair*delil[0];
+	  fi[1] += fpair*delil[1];
+	  fi[2] += fpair*delil[2];
+	  fl[0] -= fpair*delil[0];
+	  fl[1] -= fpair*delil[1];
+	  fl[2] -= fpair*delil[2];
+
+	  // sum per-atom forces into atom force array
+
+	  f[i][0] += fi[0]; f[i][1] += fi[1]; f[i][2] += fi[2];
+	  f[j][0] += fj[0]; f[j][1] += fj[1]; f[j][2] += fj[2];
+	  f[k][0] += fk[0]; f[k][1] += fk[1]; f[k][2] += fk[2];
+	  f[l][0] += fl[0]; f[l][1] += fl[1]; f[l][2] += fl[2];
+
+	  if (evflag) {
+	    delkl[0] = delil[0] - del21[0];
+	    delkl[1] = delil[1] - del21[1];
+	    delkl[2] = delil[2] - del21[2];
+	    ev_tally4_thr(this,i,j,k,l,evdwl,fi,fj,fk,delil,del34,delkl,thr);
+	  }
+	}
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   create REBO neighbor list from main neighbor list
+   REBO neighbor list stores neighbors of ghost atoms
+------------------------------------------------------------------------- */
+
+void PairAIREBOOMP::REBO_neigh_thr()
+{
+  const int nlocal = atom->nlocal;
+  const int nall = nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+
+  if (atom->nmax > maxlocal) {
+    maxlocal = atom->nmax;
+    memory->destroy(REBO_numneigh);
+    memory->sfree(REBO_firstneigh);
+    memory->destroy(nC);
+    memory->destroy(nH);
+    memory->create(REBO_numneigh,maxlocal,"AIREBO:numneigh");
+    REBO_firstneigh = (int **) memory->smalloc(maxlocal*sizeof(int *),
+					       "AIREBO:firstneigh");
+    memory->create(nC,maxlocal,"AIREBO:nC");
+    memory->create(nH,maxlocal,"AIREBO:nH");
+  }
+
+  if (nthreads > maxpage)
+    add_pages(nthreads - maxpage);
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none)
+#endif
+  {
+    int i,j,ii,jj,n,jnum,itype,jtype;
+    double xtmp,ytmp,ztmp,delx,dely,delz,rsq,dS;
+    int *ilist,*jlist,*numneigh,**firstneigh;
+    int *neighptr;
+
+    double **x = atom->x;
+    int *type = atom->type;
+
+    const int allnum = list->inum + list->gnum;
+    ilist = list->ilist;
+    numneigh = list->numneigh;
+    firstneigh = list->firstneigh;
+
+#if defined(_OPENMP)
+    const int tid = omp_get_thread_num();
+#else
+    const int tid = 0;
+#endif
+
+    const int iidelta = 1 + allnum/nthreads;
+    const int iifrom = tid*iidelta;
+    int iito   = iifrom + iidelta;
+    if (iito > allnum)
+      iito = allnum;
+
+    // store all REBO neighs of owned and ghost atoms
+    // scan full neighbor list of I
+
+    int npage = tid;
+    int npnt = 0;
+
+    for (ii = iifrom; ii < iito; ii++) {
+      i = ilist[ii];
+
+#if defined(_OPENMP)
+#pragma omp critical
+#endif
+      if (pgsize - npnt < oneatom) {
+	npnt = 0;
+	npage += nthreads;
+	if (npage >= maxpage) add_pages(nthreads);
+      }
+      neighptr = &(pages[npage][npnt]);
+      n = 0;
+
+      xtmp = x[i][0];
+      ytmp = x[i][1];
+      ztmp = x[i][2];
+      itype = map[type[i]];
+      nC[i] = nH[i] = 0.0;
+      jlist = firstneigh[i];
+      jnum = numneigh[i];
+
+      for (jj = 0; jj < jnum; jj++) {
+	j = jlist[jj];
+	j &= NEIGHMASK;
+	jtype = map[type[j]];
+	delx = xtmp - x[j][0];
+	dely = ytmp - x[j][1];
+	delz = ztmp - x[j][2];
+	rsq = delx*delx + dely*dely + delz*delz;
+
+	if (rsq < rcmaxsq[itype][jtype]) {
+	  neighptr[n++] = j;
+	  if (jtype == 0)
+	    nC[i] += Sp(sqrt(rsq),rcmin[itype][jtype],rcmax[itype][jtype],dS);
+	  else
+	    nH[i] += Sp(sqrt(rsq),rcmin[itype][jtype],rcmax[itype][jtype],dS);
+	}
+      }
+
+      REBO_firstneigh[i] = neighptr;
+      REBO_numneigh[i] = n;
+      npnt += n;
+
+      if (npnt >= pgsize)
+	error->one(FLERR,"REBO list overflow, boost neigh_modify one or page");
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairAIREBOOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairAIREBO::memory_usage();
+
+  return bytes;
+}
diff --git a/src/USER-OMP/pair_airebo_omp.h b/src/USER-OMP/pair_airebo_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..9ba598fa8fe60f7e89aeea04dcfe6483e5dddd91
--- /dev/null
+++ b/src/USER-OMP/pair_airebo_omp.h
@@ -0,0 +1,53 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(airebo/omp,PairAIREBOOMP)
+
+#else
+
+#ifndef LMP_PAIR_AIREBO_OMP_H
+#define LMP_PAIR_AIREBO_OMP_H
+
+#include "pair_airebo.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairAIREBOOMP : public PairAIREBO, public ThrOMP {
+
+ public:
+  PairAIREBOOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ protected:
+  double bondorder_thr(int i, int j, double rij[3], double rijmag,
+		       double VA, int vflag_atom, ThrData * const thr);
+  double bondorderLJ_thr(int i, int j, double rij[3], double rijmag,
+			 double VA, double rij0[3], double rijmag0,
+			 int vflag_atom, ThrData * const thr);
+
+  void FREBO_thr(int ifrom, int ito, int evflag, int eflag,
+		 int vflag_atom, ThrData * const thr);
+  void FLJ_thr(int ifrom, int ito, int evflag, int eflag,
+	       int vflag_atom, ThrData * const thr);
+  void TORSION_thr(int ifrom, int ito, int evflag, int eflag, ThrData * const thr);
+  void REBO_neigh_thr();
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_comb_omp.cpp b/src/USER-OMP/pair_comb_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..80607b7225ba6e3826b12366e07cec6420b0f4f6
--- /dev/null
+++ b/src/USER-OMP/pair_comb_omp.cpp
@@ -0,0 +1,647 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_comb_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "group.h"
+#include "force.h"
+#include "memory.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+#if defined(_OPENMP)
+#include <omp.h>
+#endif
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairCombOMP::PairCombOMP(LAMMPS *lmp) :
+  PairComb(lmp), ThrOMP(lmp, THR_PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairCombOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = vflag_fdotr = vflag_atom = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+  // grow coordination array if necessary
+
+  if (atom->nmax > nmax) {
+    memory->destroy(NCo);
+    memory->destroy(bbij);
+    nmax = atom->nmax;
+    memory->create(NCo,nmax,"pair:NCo");
+    memory->create(bbij,nmax,nmax,"pair:bbij");
+  }
+
+  // Build short range neighbor list
+  Short_neigh_thr();
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (vflag_atom) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (vflag_atom) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else eval<0,0,0>(ifrom, ito, thr);
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int VFLAG_ATOM>
+void PairCombOMP::eval(int iifrom, int iito, ThrData * const thr)
+{
+  int i,j,k,ii,jj,kk,jnum,iparam_i;
+  int itag,jtag,itype,jtype,ktype,iparam_ij,iparam_ijk;
+  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
+  double rsq,rsq1,rsq2;
+  double delr1[3],delr2[3],fi[3],fj[3],fk[3];
+  double zeta_ij,prefactor;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+  int mr1,mr2,mr3;
+  int rsc,inty;
+  double elp_ij,filp[3],fjlp[3],fklp[3];
+  double iq,jq; 
+  double yaself;
+  double potal,fac11,fac11e;
+  double vionij,fvionij,sr1,sr2,sr3,Eov,Fov;
+  int sht_jnum, *sht_jlist;
+
+  evdwl = ecoul = 0.0;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const tag = atom->tag;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  yaself = vionij = fvionij = Eov = Fov = 0.0; 
+
+  double fxtmp,fytmp,fztmp;
+  double fjxtmp,fjytmp,fjztmp;
+
+  // self energy correction term: potal
+
+  potal_calc(potal,fac11,fac11e);
+
+  // loop over full neighbor list of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    itag = tag[i];
+    itype = map[type[i]];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    fxtmp = fytmp = fztmp = 0.0;
+
+    iq = q[i];
+    NCo[i] = 0;  
+    iparam_i = elem2param[itype][itype][itype];
+
+    // self energy, only on i atom
+
+    yaself = self(&params[iparam_i],iq,potal);
+
+    if (EVFLAG) ev_tally_thr(this,i,i,nlocal,0,yaself,
+			     0.0,0.0,0.0,0.0,0.0,thr);
+
+    // two-body interactions (long and short repulsive)
+
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+    sht_jlist = sht_first[i];
+    sht_jnum = sht_num[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+      jtag = tag[j];
+
+      if (itag > jtag) {
+	if ((itag+jtag) % 2 == 0) continue;
+      } else if (itag < jtag) {
+	if ((itag+jtag) % 2 == 1) continue;
+      } else {
+	if (x[j][2] < ztmp) continue;
+	if (x[j][2] == ztmp && x[j][1] < ytmp) continue;
+	if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue;
+      }
+
+      // Qj calculates 2-body Coulombic 
+
+      jtype = map[type[j]];
+      jq = q[j];
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+
+      iparam_ij = elem2param[itype][jtype][jtype];
+
+      // long range q-dependent
+
+      if (rsq > params[iparam_ij].lcutsq) continue;
+
+      inty = intype[itype][jtype];
+
+      // polynomial three-point interpolation
+
+      tri_point(rsq, mr1, mr2, mr3, sr1, sr2, sr3, itype);
+
+      // 1/r energy and forces
+
+      direct(inty,mr1,mr2,mr3,rsq,sr1,sr2,sr3,iq,jq,
+	     potal,fac11,fac11e,vionij,fvionij);
+
+      // field correction to self energy
+
+      field(&params[iparam_ij],rsq,iq,jq,vionij,fvionij);
+
+      // polarization field
+      // sums up long range forces
+
+      fxtmp += delx*fvionij;
+      fytmp += dely*fvionij;
+      fztmp += delz*fvionij;
+      f[j][0] -= delx*fvionij;
+      f[j][1] -= dely*fvionij;
+      f[j][2] -= delz*fvionij;
+
+      if (EVFLAG) 
+	ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1,
+		     0.0,vionij,fvionij,delx,dely,delz,thr);
+
+      // short range q-independent
+
+      if (rsq > params[iparam_ij].cutsq) continue;
+
+      repulsive(&params[iparam_ij],rsq,fpair,EFLAG,evdwl,iq,jq);
+
+      // repulsion is pure two-body, sums up pair repulsive forces
+
+      fxtmp += delx*fpair;
+      fytmp += dely*fpair;
+      fztmp += delz*fpair;
+      f[j][0] -= delx*fpair;
+      f[j][1] -= dely*fpair;
+      f[j][2] -= delz*fpair;
+
+      if (EVFLAG)
+	ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1,
+		     evdwl,0.0,fpair,delx,dely,delz,thr);
+    }
+
+    // accumulate coordination number information
+
+    if (cor_flag) {
+      int numcoor = 0;
+      for (jj = 0; jj < sht_jnum; jj++) {
+        j = sht_jlist[jj];
+	j &= NEIGHMASK;
+	jtype = map[type[j]];
+	iparam_ij = elem2param[itype][jtype][jtype];
+	
+	if(params[iparam_ij].hfocor > 0.0 ) {
+	  delr1[0] = x[j][0] - xtmp;
+	  delr1[1] = x[j][1] - ytmp;
+	  delr1[2] = x[j][2] - ztmp;
+	  rsq1 = vec3_dot(delr1,delr1);
+	  
+	  if (rsq1 > params[iparam_ij].cutsq) continue;
+	  ++numcoor;
+	}
+      }
+      NCo[i] = numcoor; 
+    }
+
+    // three-body interactions 
+    // half i-j loop
+
+    for (jj = 0; jj < sht_jnum; jj++) {
+      j = sht_jlist[jj];
+      j &= NEIGHMASK;
+
+      jtype = map[type[j]];
+      iparam_ij = elem2param[itype][jtype][jtype];
+
+      // this Qj for q-dependent BSi
+
+      jq = q[j];
+
+      delr1[0] = x[j][0] - xtmp;
+      delr1[1] = x[j][1] - ytmp;
+      delr1[2] = x[j][2] - ztmp;
+      rsq1 = vec3_dot(delr1,delr1);
+
+      if (rsq1 > params[iparam_ij].cutsq) continue;
+
+      // accumulate bondorder zeta for each i-j interaction via loop over k
+
+      fjxtmp = fjytmp = fjztmp = 0.0;
+      zeta_ij = 0.0;
+      cuo_flag1 = 0; cuo_flag2 = 0;
+
+      for (kk = 0; kk < sht_jnum; kk++) {
+	k = sht_jlist[kk];
+	if (j == k) continue;
+	k &= NEIGHMASK;
+	ktype = map[type[k]];
+	iparam_ijk = elem2param[itype][jtype][ktype];
+
+	delr2[0] = x[k][0] - xtmp;
+	delr2[1] = x[k][1] - ytmp;
+	delr2[2] = x[k][2] - ztmp;
+	rsq2 = vec3_dot(delr2,delr2);
+
+	if (rsq2 > params[iparam_ijk].cutsq) continue;
+
+	zeta_ij += zeta(&params[iparam_ijk],rsq1,rsq2,delr1,delr2);
+
+	if (params[iparam_ijk].hfocor == -2.0) cuo_flag1 = 1;
+	if (params[iparam_ijk].hfocor == -1.0) cuo_flag2 = 1;
+      }
+
+      if (cuo_flag1 && cuo_flag2) cuo_flag = 1;
+      else cuo_flag = 0;
+
+      force_zeta(&params[iparam_ij],EFLAG,i,j,rsq1,zeta_ij,
+		 iq,jq,fpair,prefactor,evdwl);
+
+      // over-coordination correction for HfO2
+
+      if (cor_flag && NCo[i] != 0)
+	Over_cor(&params[iparam_ij],rsq1,NCo[i],Eov, Fov);
+      evdwl +=  Eov;
+      fpair +=  Fov;
+
+      fxtmp += delr1[0]*fpair;
+      fytmp += delr1[1]*fpair;
+      fztmp += delr1[2]*fpair;
+      fjxtmp -= delr1[0]*fpair;
+      fjytmp -= delr1[1]*fpair;
+      fjztmp -= delr1[2]*fpair;
+
+      if (EVFLAG) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1,evdwl,0.0,
+			       -fpair,-delr1[0],-delr1[1],-delr1[2],thr);
+
+      // attractive term via loop over k (3-body forces)
+
+      for (kk = 0; kk < sht_jnum; kk++) {
+	k = sht_jlist[kk];
+	if (j == k) continue;
+	k &= NEIGHMASK;
+	ktype = map[type[k]];
+	iparam_ijk = elem2param[itype][jtype][ktype];
+
+	delr2[0] = x[k][0] - xtmp;
+	delr2[1] = x[k][1] - ytmp;
+	delr2[2] = x[k][2] - ztmp;
+	rsq2 = vec3_dot(delr2,delr2);
+	if (rsq2 > params[iparam_ijk].cutsq) continue;
+
+	for (rsc = 0; rsc < 3; rsc++)
+	  fi[rsc] = fj[rsc] = fk[rsc] = 0.0;
+
+	attractive(&params[iparam_ijk],prefactor,
+		   rsq1,rsq2,delr1,delr2,fi,fj,fk);
+
+	// 3-body LP and BB correction and forces
+
+	elp_ij = elp(&params[iparam_ijk],rsq1,rsq2,delr1,delr2);
+	flp(&params[iparam_ijk],rsq1,rsq2,delr1,delr2,filp,fjlp,fklp); 
+
+	fxtmp += fi[0] + filp[0];
+	fytmp += fi[1] + filp[1];
+	fztmp += fi[2] + filp[2];
+	fjxtmp += fj[0] + fjlp[0];
+	fjytmp += fj[1] + fjlp[1];
+	fjztmp += fj[2] + fjlp[2];
+	f[k][0] += fk[0] + fklp[0];
+	f[k][1] += fk[1] + fklp[1];
+	f[k][2] += fk[2] + fklp[2];
+
+        if (EVFLAG) 
+	  ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1,
+		       elp_ij,0.0,0.0,0.0,0.0,0.0, thr);
+	if (VFLAG_ATOM) v_tally3_thr(i,j,k,fj,fk,delr1,delr2,thr);
+      }
+      f[j][0] += fjxtmp;
+      f[j][1] += fjytmp;
+      f[j][2] += fjztmp;
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+
+    if (cuo_flag) params[iparam_i].cutsq *= 0.65;
+  }
+  cuo_flag = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairCombOMP::yasu_char(double *qf_fix, int &igroup)
+{
+  int ii;
+  double potal,fac11,fac11e;
+
+  const double * const * const x = atom->x;
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int * const tag = atom->tag;
+
+  const int inum = list->inum;
+  const int * const ilist = list->ilist;
+  const int * const numneigh = list->numneigh;
+  const int * const * const firstneigh = list->firstneigh;
+
+  const int * const mask = atom->mask;
+  const int groupbit = group->bitmask[igroup];
+
+  qf = qf_fix;
+  for (ii = 0; ii < inum; ii++) {
+    const int i = ilist[ii];
+    if (mask[i] & groupbit)
+      qf[i] = 0.0;
+  }
+
+  // communicating charge force to all nodes, first forward then reverse
+
+  comm->forward_comm_pair(this);
+
+  // self energy correction term: potal
+
+  potal_calc(potal,fac11,fac11e);
+
+  // loop over full neighbor list of my atoms
+#if defined(_OPENMP)
+#pragma omp parallel for private(ii) default(none) shared(potal,fac11e)
+#endif
+  for (ii = 0; ii < inum; ii ++) {
+    double fqi,fqj,fqij,fqji,fqjj,delr1[3];
+    double sr1,sr2,sr3;
+    int mr1,mr2,mr3;
+
+    const int i = ilist[ii];
+    const int itag = tag[i];
+
+    if (mask[i] & groupbit) {
+      fqi = fqj = fqij = fqji = fqjj = 0.0; // should not be needed.
+      int itype = map[type[i]];
+      const double xtmp = x[i][0];
+      const double ytmp = x[i][1];
+      const double ztmp = x[i][2];
+      const double iq = q[i];
+      const int iparam_i = elem2param[itype][itype][itype];
+
+      // charge force from self energy
+
+      fqi = qfo_self(&params[iparam_i],iq,potal);
+
+      // two-body interactions
+
+      const int * const jlist = firstneigh[i];
+      const int jnum = numneigh[i];
+
+      for (int jj = 0; jj < jnum; jj++) {
+        const int j = jlist[jj] & NEIGHMASK;
+	const int jtag = tag[j];	
+
+        if (itag > jtag) {
+          if ((itag+jtag) % 2 == 0) continue;
+        } else if (itag < jtag) {
+          if ((itag+jtag) % 2 == 1) continue;
+        } else {
+          if (x[j][2] < ytmp) continue;
+          if (x[j][2] == ztmp && x[j][1] < ytmp) continue;
+          if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue;
+        } 
+
+        const int jtype = map[type[j]];
+        double jq = q[j];
+
+        delr1[0] = x[j][0] - xtmp;
+        delr1[1] = x[j][1] - ytmp;
+        delr1[2] = x[j][2] - ztmp;
+        double rsq1 = vec3_dot(delr1,delr1);
+
+        const int iparam_ij = elem2param[itype][jtype][jtype];
+
+        // long range q-dependent
+
+        if (rsq1 > params[iparam_ij].lcutsq) continue;
+
+        const int inty = intype[itype][jtype];
+
+        // polynomial three-point interpolation
+
+        tri_point(rsq1,mr1,mr2,mr3,sr1,sr2,sr3,itype);
+
+        // 1/r charge forces
+
+        qfo_direct(inty,mr1,mr2,mr3,rsq1,sr1,sr2,sr3,fac11e,fqij);
+
+        // field correction to self energy and charge force
+
+        qfo_field(&params[iparam_ij],rsq1,iq,jq,fqji,fqjj);
+        fqi   += jq * fqij + fqji;
+#if defined(_OPENMP)
+#pragma omp atomic
+#endif
+        qf[j] += (iq * fqij + fqjj);
+      }
+      
+        // three-body interactions
+	
+      for (int jj = 0; jj < jnum; jj++) {
+	const int j = jlist[jj] & NEIGHMASK;
+	const int jtype = map[type[j]];
+	const double jq = q[j];
+
+	delr1[0] = x[j][0] - xtmp;
+	delr1[1] = x[j][1] - ytmp;
+	delr1[2] = x[j][2] - ztmp;
+	double rsq1 = vec3_dot(delr1,delr1);
+
+        const int iparam_ij = elem2param[itype][jtype][jtype];
+
+        if (rsq1 > params[iparam_ij].cutsq) continue;
+
+        // charge force in Aij and Bij
+
+        qfo_short(&params[iparam_ij],i,j,rsq1,iq,jq,fqij,fqjj);
+        fqi += fqij;  
+#if defined(_OPENMP)
+#pragma omp atomic
+#endif
+	qf[j] += fqjj;
+      }
+
+#if defined(_OPENMP) && 0
+#pragma omp atomic
+#endif
+      qf[i] += fqi;
+    }
+  }
+
+  comm->reverse_comm_pair(this);
+
+  // sum charge force on each node and return it
+
+  double eneg = 0.0;
+  for (ii = 0; ii < inum; ii++) {
+    const int i = ilist[ii];
+    if (mask[i] & groupbit)
+      eneg += qf[i];
+  }
+  double enegtot;
+  MPI_Allreduce(&eneg,&enegtot,1,MPI_DOUBLE,MPI_SUM,world);
+  return enegtot;
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairCombOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairComb::memory_usage();
+
+  return bytes;
+}
+/* ---------------------------------------------------------------------- */
+
+void PairCombOMP::Short_neigh_thr()
+{
+
+  if (atom->nmax > nmax) {
+    nmax = int(1.0 * atom->nmax);
+    memory->sfree(sht_num);
+    memory->sfree(sht_first);
+    memory->create(sht_num,nmax,"pair:sht_num");
+    sht_first = (int **) memory->smalloc(nmax*sizeof(int *),
+	    "pair:sht_first");
+  }
+
+  const int nthreads = comm->nthreads;
+  if (nthreads > maxpage)
+    add_pages(nthreads - maxpage);
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none)
+#endif
+  {
+    int nj,npntj,*neighptrj,itype,jtype;
+    int iparam_ij,*ilist,*jlist,*numneigh,**firstneigh;
+    int jnum,i,j,ii,jj;
+    double xtmp,ytmp,ztmp,rsq,delrj[3];
+    double **x = atom->x;
+    int *type  = atom->type;
+
+    const int inum = list->inum;
+    ilist = list->ilist;
+    numneigh = list->numneigh;
+    firstneigh = list->firstneigh;
+
+
+#if defined(_OPENMP)
+    const int tid = omp_get_thread_num();
+#else
+    const int tid = 0;
+#endif
+
+    const int iidelta = 1 + inum/nthreads;
+    const int iifrom = tid*iidelta;
+    int iito   = iifrom + iidelta;
+    if (iito > inum) iito = inum;
+
+    int npage = tid;
+    npntj = 0;
+
+    for (ii = iifrom; ii < iito; ii++) {
+      i = ilist[ii];
+      itype = type[i];
+
+#if defined(_OPENMP)
+#pragma omp critical
+#endif
+      if(pgsize - npntj < oneatom) {
+	npntj = 0;
+	npage++;
+	if (npage == maxpage) add_pages(nthreads);
+      }
+ 
+      neighptrj = &pages[npage][npntj];
+      nj = 0;
+
+      xtmp = x[i][0];
+      ytmp = x[i][1];
+      ztmp = x[i][2];
+
+      jlist = firstneigh[i];
+      jnum = numneigh[i];
+
+      for (jj = 0; jj < jnum; jj++) {
+	j = jlist[jj];
+	j &= NEIGHMASK;
+	jtype = type[j];
+	iparam_ij = elem2param[itype][jtype][jtype];
+
+	delrj[0] = xtmp - x[j][0];
+	delrj[1] = ytmp - x[j][1];
+	delrj[2] = ztmp - x[j][2];
+	rsq = vec3_dot(delrj,delrj);
+      
+	if (rsq > cutmin) continue;
+	neighptrj[nj++] = j;
+      }
+      sht_first[i] = neighptrj;
+      sht_num[i] = nj;
+      npntj += nj;
+    }
+  }
+}
diff --git a/src/USER-OMP/pair_comb_omp.h b/src/USER-OMP/pair_comb_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..85011064e733a3146b0fe6d1c40389d7e25311f1
--- /dev/null
+++ b/src/USER-OMP/pair_comb_omp.h
@@ -0,0 +1,47 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(comb/omp,PairCombOMP)
+
+#else
+
+#ifndef LMP_PAIR_COMB_OMP_H
+#define LMP_PAIR_COMB_OMP_H
+
+#include "pair_comb.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairCombOMP : public PairComb, public ThrOMP {
+
+ public:
+  PairCombOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+  virtual double yasu_char(double *, int &);
+
+ private:
+  template <int EVFLAG, int EFLAG, int VFLAG_ATOM>
+  void eval(int ifrom, int ito, ThrData * const thr);
+
+  void Short_neigh_thr();
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_coul_diel_omp.cpp b/src/USER-OMP/pair_coul_diel_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e1e8690245b4d7cf2f1c1bff4452f49e3e0a11fa
--- /dev/null
+++ b/src/USER-OMP/pair_coul_diel_omp.cpp
@@ -0,0 +1,165 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_coul_diel_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairCoulDielOMP::PairCoulDielOMP(LAMMPS *lmp) :
+  PairCoulDiel(lmp), ThrOMP(lmp, THR_PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairCoulDielOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairCoulDielOMP::eval(int iifrom, int iito, ThrData * const thr)
+{
+  int i,j,ii,jj,jnum,itype,jtype;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
+  double rsq,r,rarg,th,depsdr,epsr,forcecoul,factor_coul;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+
+  ecoul = 0.0;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double qqrd2e = force->qqrd2e;
+  double fxtmp,fytmp,fztmp;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    qtmp = q[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+    fxtmp=fytmp=fztmp=0.0;
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+
+      factor_coul = special_coul[sbmask(j)];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+	r = sqrt(rsq);
+	rarg = (r-rme[itype][jtype])/sigmae[itype][jtype];
+	th=tanh(rarg);
+	epsr=a_eps+b_eps*th;
+	depsdr=b_eps * (1.0 - th*th) / sigmae[itype][jtype];
+	
+	forcecoul = qqrd2e*qtmp*q[j]*((eps_s*(epsr+r*depsdr)/epsr/epsr) -1.)/rsq;
+	fpair = factor_coul*forcecoul/r;
+	
+	fxtmp += delx*fpair;
+	fytmp += dely*fpair;
+	fztmp += delz*fpair;
+	if (NEWTON_PAIR || j < nlocal) {
+	  f[j][0] -= delx*fpair;
+	  f[j][1] -= dely*fpair;
+	  f[j][2] -= delz*fpair;
+	}
+
+	if (EFLAG) {
+	  ecoul = (qqrd2e*qtmp*q[j]*((eps_s/epsr) -1.)/r) - offset[itype][jtype];
+	  ecoul *= factor_coul;
+	}
+
+	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
+				 0.0,ecoul,fpair,delx,dely,delz,thr);
+      }
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairCoulDielOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairCoulDiel::memory_usage();
+
+  return bytes;
+}
diff --git a/src/USER-OMP/pair_coul_diel_omp.h b/src/USER-OMP/pair_coul_diel_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..2dc0083ae33a818a3646387359edc842d6916711
--- /dev/null
+++ b/src/USER-OMP/pair_coul_diel_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(coul/diel/omp,PairCoulDielOMP)
+
+#else
+
+#ifndef LMP_PAIR_COUL_DIEL_OMP_H
+#define LMP_PAIR_COUL_DIEL_OMP_H
+
+#include "pair_coul_diel.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairCoulDielOMP : public PairCoulDiel, public ThrOMP {
+
+ public:
+  PairCoulDielOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_gauss_cut_omp.cpp b/src/USER-OMP/pair_gauss_cut_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5499fab3bc6fd47a09f0bd47618480526d743282
--- /dev/null
+++ b/src/USER-OMP/pair_gauss_cut_omp.cpp
@@ -0,0 +1,156 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_gauss_cut_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairGaussCutOMP::PairGaussCutOMP(LAMMPS *lmp) :
+  PairGaussCut(lmp), ThrOMP(lmp, THR_PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairGaussCutOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairGaussCutOMP::eval(int iifrom, int iito, ThrData * const thr)
+{
+  int i,j,ii,jj,jnum,itype,jtype;
+  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
+  double rsq,r,rexp,ugauss,factor_lj;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+
+  evdwl = 0.0;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
+  double fxtmp,fytmp,fztmp;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+    fxtmp=fytmp=fztmp=0.0;
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+
+      factor_lj = special_lj[sbmask(j)];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+	r = sqrt(rsq);
+	rexp = (r-rmh[itype][jtype])/sigmah[itype][jtype];
+	ugauss = pgauss[itype][jtype]*exp(-0.5*rexp*rexp); 
+	fpair = factor_lj*rexp/r*ugauss/sigmah[itype][jtype];
+
+	fxtmp += delx*fpair;
+	fytmp += dely*fpair;
+	fztmp += delz*fpair;
+	if (NEWTON_PAIR || j < nlocal) {
+	  f[j][0] -= delx*fpair;
+	  f[j][1] -= dely*fpair;
+	  f[j][2] -= delz*fpair;
+	}
+
+	if (EFLAG) {
+	  evdwl = ugauss - offset[itype][jtype];
+	  evdwl *= factor_lj;
+	}
+
+	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
+      }
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairGaussCutOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairGaussCut::memory_usage();
+
+  return bytes;
+}
diff --git a/src/USER-OMP/pair_gauss_cut_omp.h b/src/USER-OMP/pair_gauss_cut_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..d3da26abede0b988bf7b74aba151fc302f76277d
--- /dev/null
+++ b/src/USER-OMP/pair_gauss_cut_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(gauss/cut/omp,PairGaussCutOMP)
+
+#else
+
+#ifndef LMP_PAIR_GAUSS_CUT_OMP_H
+#define LMP_PAIR_GAUSS_CUT_OMP_H
+
+#include "pair_gauss_cut.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairGaussCutOMP : public PairGaussCut, public ThrOMP {
+
+ public:
+  PairGaussCutOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_line_lj_omp.cpp b/src/USER-OMP/pair_line_lj_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..59982735d8c7aed9f39c4dcc519dbd24d9039965
--- /dev/null
+++ b/src/USER-OMP/pair_line_lj_omp.cpp
@@ -0,0 +1,339 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_line_lj_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "memory.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+#include <string.h>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairLineLJOMP::PairLineLJOMP(LAMMPS *lmp) :
+  PairLineLJ(lmp), ThrOMP(lmp, THR_PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLineLJOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+  const int * const line = atom->line;
+  const int * const type = atom->type;
+
+  // grow discrete list if necessary and initialize
+
+  if (nall > nmax) {
+    nmax = nall;
+    memory->destroy(dnum);
+    memory->destroy(dfirst);
+    memory->create(dnum,nall,"pair:dnum");
+    memory->create(dfirst,nall,"pair:dfirst");
+  }
+  memset(dnum,0,nall*sizeof(int));
+  ndiscrete = 0;
+
+  // need to discretize the system ahead of time
+  // until we find a good way to multi-thread it.
+  for (int i = 0; i < nall; ++i)
+    if (line[i] >= 0)
+      if (dnum[i] == 0)
+	discretize(i,sigma[type[i]][type[i]]);
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairLineLJOMP::eval(int iifrom, int iito, ThrData * const thr)
+{
+  int i,j,ii,jj,jnum,itype,jtype;
+  int ni,nj,npi,npj,ifirst,jfirst;
+  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
+  double rsq,r2inv,r6inv,term1,term2,sig,sig3,forcelj;
+  double xi[2],xj[2],fi[2],fj[2],dxi,dxj,dyi,dyj,ti,tj;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  double * const * const torque = thr->get_torque();
+  const int * const line = atom->line;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+      
+      if (rsq >= cutsq[itype][jtype]) continue;
+      
+      // line/line interactions = NxN particles
+      
+      evdwl = 0.0;
+      if (line[i] >= 0 && line[j] >= 0) {
+	npi = dnum[i];
+	ifirst = dfirst[i];
+	npj = dnum[j];
+	jfirst = dfirst[j];
+
+	fi[0] = fi[1] = fj[0] = fj[1] = ti = tj = 0.0;
+
+	for (ni = 0; ni < npi; ni++) {
+	  dxi = discrete[ifirst+ni].dx;
+	  dyi = discrete[ifirst+ni].dy;
+
+	  for (nj = 0; nj < npj; nj++) {
+	    dxj = discrete[jfirst+nj].dx;
+	    dyj = discrete[jfirst+nj].dy;
+
+	    xi[0] = x[i][0] + dxi;
+	    xi[1] = x[i][1] + dyi;
+	    xj[0] = x[j][0] + dxj;
+	    xj[1] = x[j][1] + dyj;
+
+	    delx = xi[0] - xj[0];
+	    dely = xi[1] - xj[1];
+	    rsq = delx*delx + dely*dely;
+
+	    sig = 0.5 * (discrete[ifirst+ni].sigma+discrete[jfirst+nj].sigma);
+	    sig3 = sig*sig*sig;
+	    term2 = 24.0*epsilon[itype][jtype] * sig3*sig3;
+	    term1 = 2.0 * term2 * sig3*sig3;
+	    r2inv = 1.0/rsq;
+	    r6inv = r2inv*r2inv*r2inv;
+	    forcelj = r6inv * (term1*r6inv - term2);
+	    fpair = forcelj*r2inv;
+
+	    if (EFLAG) evdwl += r6inv*(term1/12.0*r6inv-term2/6.0);
+
+	    fi[0] += delx*fpair;
+	    fi[1] += dely*fpair;
+	    ti += fpair*(dxi*dely - dyi*delx);
+
+	    if (NEWTON_PAIR || j < nlocal) {
+	      fj[0] -= delx*fpair;
+	      fj[1] -= dely*fpair;
+	      tj += fpair*(dxj*dely - dyj*delx);
+	    }
+	  }
+	}
+
+	f[i][0] += fi[0];
+	f[i][1] += fi[1];
+	f[j][0] += fj[0];
+	f[j][1] += fj[1];
+	torque[i][2] += ti;
+	torque[j][2] += tj;
+
+      // line/particle interaction = Nx1 particles
+      // convert line into Np particles based on sigma and line length
+
+      } else if (line[i] >= 0) {
+	npi = dnum[i];
+	ifirst = dfirst[i];
+
+	fi[0] = fi[1] = fj[0] = fj[1] = ti = tj = 0.0;
+
+	for (ni = 0; ni < npi; ni++) {
+	  dxi = discrete[ifirst+ni].dx;
+	  dyi = discrete[ifirst+ni].dy;
+
+	  xi[0] = x[i][0] + dxi;
+	  xi[1] = x[i][1] + dyi;
+	  xj[0] = x[j][0];
+	  xj[1] = x[j][1];
+
+	  delx = xi[0] - xj[0];
+	  dely = xi[1] - xj[1];
+	  rsq = delx*delx + dely*dely;
+
+	  sig = 0.5 * (discrete[ifirst+ni].sigma+sigma[jtype][jtype]);
+	  sig3 = sig*sig*sig;
+	  term2 = 24.0*epsilon[itype][jtype] * sig3*sig3;
+	  term1 = 2.0 * term2 * sig3*sig3;
+	  r2inv = 1.0/rsq;
+	  r6inv = r2inv*r2inv*r2inv;
+	  forcelj = r6inv * (term1*r6inv - term2);
+	  fpair = forcelj*r2inv;
+
+	  if (EFLAG) evdwl += r6inv*(term1/12.0*r6inv-term2/6.0);
+
+	  fi[0] += delx*fpair;
+	  fi[1] += dely*fpair;
+	  ti += fpair*(dxi*dely - dyi*delx);
+
+	  if (NEWTON_PAIR || j < nlocal) {
+	    fj[0] -= delx*fpair;
+	    fj[1] -= dely*fpair;
+	    tj += fpair*(dxj*dely - dyj*delx);
+	  }
+	}
+
+	f[i][0] += fi[0];
+	f[i][1] += fi[1];
+	f[j][0] += fj[0];
+	f[j][1] += fj[1];
+	torque[i][2] += ti;
+	torque[j][2] += tj;
+
+      // particle/line interaction = Nx1 particles
+      // convert line into Np particles based on sigma and line length
+
+      } else if (line[j] >= 0) {
+	npj = dnum[j];
+	jfirst = dfirst[j];
+
+	fi[0] = fi[1] = fj[0] = fj[1] = ti = tj = 0.0;
+
+	for (nj = 0; nj < npj; nj++) {
+	  dxj = discrete[jfirst+nj].dx;
+	  dyj = discrete[jfirst+nj].dy;
+
+	  xi[0] = x[i][0];
+	  xi[1] = x[i][1];
+	  xj[0] = x[j][0] + dxj;
+	  xj[1] = x[j][1] + dyj;
+
+	  delx = xi[0] - xj[0];
+	  dely = xi[1] - xj[1];
+	  rsq = delx*delx + dely*dely;
+
+	  sig = 0.5 * (sigma[itype][itype]+discrete[jfirst+nj].sigma);
+	  sig3 = sig*sig*sig;
+	  term2 = 24.0*epsilon[itype][jtype] * sig3*sig3;
+	  term1 = 2.0 * term2 * sig3*sig3;
+	  r2inv = 1.0/rsq;
+	  r6inv = r2inv*r2inv*r2inv;
+	  forcelj = r6inv * (term1*r6inv - term2);
+	  fpair = forcelj*r2inv;
+	  
+	  if (EFLAG) evdwl += r6inv*(term1/12.0*r6inv-term2/6.0);
+
+	  fi[0] += delx*fpair;
+	  fi[1] += dely*fpair;
+	  ti += fpair*(dxi*dely - dyi*delx);
+
+	  if (NEWTON_PAIR || j < nlocal) {
+	    fj[0] -= delx*fpair;
+	    fj[1] -= dely*fpair;
+	    tj -= fpair*(dxj*dely - dyj*delx);
+	  }
+	}
+
+	f[i][0] += fi[0];
+	f[i][1] += fi[1];
+	f[j][0] += fj[0];
+	f[j][1] += fj[1];
+	torque[i][2] += ti;
+	torque[j][2] += tj;
+
+      // particle/particle interaction = 1x1 particles
+
+      } else {
+	r2inv = 1.0/rsq;
+	r6inv = r2inv*r2inv*r2inv;
+	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
+	fpair = forcelj*r2inv;
+
+	if (EFLAG)
+	  evdwl += r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]);
+
+	f[i][0] += delx*fpair;
+	f[i][1] += dely*fpair;
+	f[i][2] += delz*fpair;
+	if (NEWTON_PAIR || j < nlocal) {
+	  f[j][0] -= delx*fpair;
+	  f[j][1] -= dely*fpair;
+	  f[j][2] -= delz*fpair;
+	}
+      }
+
+      if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
+			       evdwl,0.0,fpair,delx,dely,delz,thr);
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairLineLJOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairLineLJ::memory_usage();
+
+  return bytes;
+}
diff --git a/src/USER-OMP/pair_line_lj_omp.h b/src/USER-OMP/pair_line_lj_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..93b38afaf0880b213e9835ee5b3dc2b34c7ed169
--- /dev/null
+++ b/src/USER-OMP/pair_line_lj_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(line/lj/omp,PairLineLJOMP)
+
+#else
+
+#ifndef LMP_PAIR_LINE_LJ_OMP_H
+#define LMP_PAIR_LINE_LJ_OMP_H
+
+#include "pair_line_lj.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLineLJOMP : public PairLineLJ, public ThrOMP {
+
+ public:
+  PairLineLJOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_charmm_coul_pppm_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_pppm_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3b12668549718b7a5550e5c8ed44191ae412184a
--- /dev/null
+++ b/src/USER-OMP/pair_lj_charmm_coul_pppm_omp.cpp
@@ -0,0 +1,260 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_lj_charmm_coul_pppm_omp.h"
+#include "pppm_proxy.h"
+#include "atom.h"
+#include "comm.h"
+#include "error.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "update.h"
+
+#include <string.h>
+
+using namespace LAMMPS_NS;
+
+#define EWALD_F   1.12837917
+#define EWALD_P   0.3275911
+#define A1        0.254829592
+#define A2       -0.284496736
+#define A3        1.421413741
+#define A4       -1.453152027
+#define A5        1.061405429
+
+/* ---------------------------------------------------------------------- */
+
+PairLJCharmmCoulPPPMOMP::PairLJCharmmCoulPPPMOMP(LAMMPS *lmp) :
+  PairLJCharmmCoulLong(lmp), ThrOMP(lmp, THR_PAIR|THR_PROXY)
+{
+  respa_enable = 0;
+  nproxy=1;
+  
+  kspace = NULL;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLJCharmmCoulPPPMOMP::init_style()
+{
+  if (comm->nthreads < 2)
+    error->all(FLERR,"need at least two threads per MPI task for this pair style");
+
+  if (strcmp(force->kspace_style,"pppm/proxy") != 0)
+    error->all(FLERR,"kspace style pppm/proxy is required with this pair style");
+
+  kspace = static_cast<PPPMProxy *>(force->kspace);
+
+  PairLJCharmmCoulLong::init_style();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLJCharmmCoulPPPMOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads, nproxy);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    // thread id 0 runs pppm, the rest the pair style
+    if (tid < nproxy) {
+      if (update->setupflag) kspace->setup_proxy();
+      kspace->compute_proxy(eflag,vflag);
+    } else {
+      if (evflag) {
+	if (eflag) {
+	  if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	  else eval<1,1,0>(ifrom, ito, thr);
+	} else {
+	  if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	  else eval<1,0,0>(ifrom, ito, thr);
+	}
+      } else {
+	if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+	else eval<0,0,0>(ifrom, ito, thr);
+      }
+    }
+    
+    sync_threads();
+    reduce_thr(this, eflag, vflag, thr, nproxy);
+  } // end of omp parallel region
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairLJCharmmCoulPPPMOMP::eval(int iifrom, int iito, ThrData * const thr)
+{
+  int i,j,ii,jj,jnum,itype,jtype,itable;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
+  double fraction,table;
+  double r,rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
+  double grij,expm2,prefactor,t,erfc;
+  double philj,switch1,switch2;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+
+  evdwl = ecoul = 0.0;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double * const special_lj = force->special_lj;
+  const double qqrd2e = force->qqrd2e;
+  double fxtmp,fytmp,fztmp;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    qtmp = q[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+    fxtmp=fytmp=fztmp=0.0;
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_lj = special_lj[sbmask(j)];
+      factor_coul = special_coul[sbmask(j)];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+	r2inv = 1.0/rsq;
+
+	if (rsq < cut_coulsq) {
+	  if (!ncoultablebits || rsq <= tabinnersq) {
+	    r = sqrt(rsq);
+	    grij = g_ewald * r;
+	    expm2 = exp(-grij*grij);
+	    t = 1.0 / (1.0 + EWALD_P*grij);
+	    erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+	    prefactor = qqrd2e * qtmp*q[j]/r;
+	    forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
+	    if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
+	  } else {
+	    union_int_float_t rsq_lookup;
+	    rsq_lookup.f = rsq;
+	    itable = rsq_lookup.i & ncoulmask;
+	    itable >>= ncoulshiftbits;
+	    fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
+	    table = ftable[itable] + fraction*dftable[itable];
+	    forcecoul = qtmp*q[j] * table;
+	    if (factor_coul < 1.0) {
+	      table = ctable[itable] + fraction*dctable[itable];
+	      prefactor = qtmp*q[j] * table;
+	      forcecoul -= (1.0-factor_coul)*prefactor;
+	    }
+	  }
+	} else forcecoul = 0.0;
+
+	if (rsq < cut_ljsq) {
+	  r6inv = r2inv*r2inv*r2inv;
+	  jtype = type[j];
+	  forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
+	  if (rsq > cut_lj_innersq) {
+	    switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
+	      (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
+	    switch2 = 12.0*rsq * (cut_ljsq-rsq) * 
+	      (rsq-cut_lj_innersq) / denom_lj;
+	    philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]);
+	    forcelj = forcelj*switch1 + philj*switch2;
+	  }
+	  forcelj *= factor_lj;
+	} else forcelj = 0.0;
+
+	fpair = (forcecoul + forcelj) * r2inv;
+
+	fxtmp += delx*fpair;
+	fytmp += dely*fpair;
+	fztmp += delz*fpair;
+	if (NEWTON_PAIR || j < nlocal) {
+	  f[j][0] -= delx*fpair;
+	  f[j][1] -= dely*fpair;
+	  f[j][2] -= delz*fpair;
+	}
+
+	if (EFLAG) {
+	  if (rsq < cut_coulsq) {
+	    if (!ncoultablebits || rsq <= tabinnersq)
+	      ecoul = prefactor*erfc;
+	    else {
+	      table = etable[itable] + fraction*detable[itable];
+	      ecoul = qtmp*q[j] * table;
+	    }
+	    if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
+	  } else ecoul = 0.0;
+
+	  if (rsq < cut_ljsq) {
+	    evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]);
+	    if (rsq > cut_lj_innersq) {
+	      switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
+		(cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
+	      evdwl *= switch1;
+	    }
+	    evdwl *= factor_lj;
+	  } else evdwl = 0.0;
+	}
+	
+	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
+				 evdwl,ecoul,fpair,delx,dely,delz,thr);
+      }
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairLJCharmmCoulPPPMOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairLJCharmmCoulLong::memory_usage();
+
+  return bytes;
+}
diff --git a/src/USER-OMP/pair_lj_charmm_coul_pppm_omp.h b/src/USER-OMP/pair_lj_charmm_coul_pppm_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..1faee2c47980b74d86bd00e9d75b987dae3730f1
--- /dev/null
+++ b/src/USER-OMP/pair_lj_charmm_coul_pppm_omp.h
@@ -0,0 +1,54 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(lj/charmm/coul/pppm/omp,PairLJCharmmCoulPPPMOMP)
+
+#else
+
+#ifndef LMP_PAIR_LJ_CHARMM_COUL_PPPM_OMP_H
+#define LMP_PAIR_LJ_CHARMM_COUL_PPPM_OMP_H
+
+#include "pair_lj_charmm_coul_long.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLJCharmmCoulPPPMOMP : public PairLJCharmmCoulLong, public ThrOMP {
+
+ public:
+  PairLJCharmmCoulPPPMOMP(class LAMMPS *);
+  virtual ~PairLJCharmmCoulPPPMOMP() {};
+
+  virtual void init_style();
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(int ifrom, int ito, ThrData * const thr);
+
+  class PPPMProxy *kspace;
+  int nproxy;
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_coul_pppm_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_pppm_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c048db9d4c098f7357de97bdc39373d3430ef335
--- /dev/null
+++ b/src/USER-OMP/pair_lj_cut_coul_pppm_omp.cpp
@@ -0,0 +1,245 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_lj_cut_coul_pppm_omp.h"
+#include "pppm_proxy.h"
+#include "atom.h"
+#include "comm.h"
+#include "error.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "update.h"
+
+#include <string.h>
+
+using namespace LAMMPS_NS;
+
+#define EWALD_F   1.12837917
+#define EWALD_P   0.3275911
+#define A1        0.254829592
+#define A2       -0.284496736
+#define A3        1.421413741
+#define A4       -1.453152027
+#define A5        1.061405429
+
+/* ---------------------------------------------------------------------- */
+
+PairLJCutCoulPPPMOMP::PairLJCutCoulPPPMOMP(LAMMPS *lmp) :
+  PairLJCutCoulLong(lmp), ThrOMP(lmp, THR_PAIR|THR_PROXY)
+{
+  respa_enable = 0;
+  nproxy=1;
+  
+  kspace = NULL;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLJCutCoulPPPMOMP::init_style()
+{
+  if (comm->nthreads < 2)
+    error->all(FLERR,"need at least two threads per MPI task for this pair style");
+
+  if (strcmp(force->kspace_style,"pppm/proxy") != 0)
+    error->all(FLERR,"kspace style pppm/proxy is required with this pair style");
+
+  kspace = static_cast<PPPMProxy *>(force->kspace);
+
+  PairLJCutCoulLong::init_style();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLJCutCoulPPPMOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads, nproxy);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    // thread id 0 runs pppm, the rest the pair style
+    if (tid < nproxy) {
+      if (update->setupflag) kspace->setup_proxy();
+      kspace->compute_proxy(eflag,vflag);
+    } else {
+      if (evflag) {
+	if (eflag) {
+	  if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	  else eval<1,1,0>(ifrom, ito, thr);
+	} else {
+	  if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	  else eval<1,0,0>(ifrom, ito, thr);
+	}
+      } else {
+	if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+	else eval<0,0,0>(ifrom, ito, thr);
+      }
+    }
+    
+    sync_threads();
+    reduce_thr(this, eflag, vflag, thr, nproxy);
+  } // end of omp parallel region
+}
+/* ---------------------------------------------------------------------- */
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairLJCutCoulPPPMOMP::eval(int iifrom, int iito, ThrData * const thr)
+{
+  int i,j,ii,jj,jnum,itype,jtype,itable;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
+  double fraction,table;
+  double r,rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
+  double grij,expm2,prefactor,t,erfc;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+
+  evdwl = ecoul = 0.0;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double * const special_lj = force->special_lj;
+  const double qqrd2e = force->qqrd2e;
+  double fxtmp,fytmp,fztmp;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    qtmp = q[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+    fxtmp=fytmp=fztmp=0.0;
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_lj = special_lj[sbmask(j)];
+      factor_coul = special_coul[sbmask(j)];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+	r2inv = 1.0/rsq;
+
+	if (rsq < cut_coulsq) {
+	  if (!ncoultablebits || rsq <= tabinnersq) {
+	    r = sqrt(rsq);
+	    grij = g_ewald * r;
+	    expm2 = exp(-grij*grij);
+	    t = 1.0 / (1.0 + EWALD_P*grij);
+	    erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+	    prefactor = qqrd2e * qtmp*q[j]/r;
+	    forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
+	    if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
+	  } else {
+	    union_int_float_t rsq_lookup;
+	    rsq_lookup.f = rsq;
+	    itable = rsq_lookup.i & ncoulmask;
+	    itable >>= ncoulshiftbits;
+	    fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
+	    table = ftable[itable] + fraction*dftable[itable];
+	    forcecoul = qtmp*q[j] * table;
+	    if (factor_coul < 1.0) {
+	      table = ctable[itable] + fraction*dctable[itable];
+	      prefactor = qtmp*q[j] * table;
+	      forcecoul -= (1.0-factor_coul)*prefactor;
+	    }
+	  }
+	} else forcecoul = 0.0;
+
+	if (rsq < cut_ljsq[itype][jtype]) {
+	  r6inv = r2inv*r2inv*r2inv;
+	  forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
+	  forcelj *= factor_lj;
+	} else forcelj = 0.0;
+
+	fpair = (forcecoul + forcelj) * r2inv;
+
+	fxtmp += delx*fpair;
+	fytmp += dely*fpair;
+	fztmp += delz*fpair;
+	if (NEWTON_PAIR || j < nlocal) {
+	  f[j][0] -= delx*fpair;
+	  f[j][1] -= dely*fpair;
+	  f[j][2] -= delz*fpair;
+	}
+
+	if (EFLAG) {
+	  if (rsq < cut_coulsq) {
+	    if (!ncoultablebits || rsq <= tabinnersq)
+	      ecoul = prefactor*erfc;
+	    else {
+	      table = etable[itable] + fraction*detable[itable];
+	      ecoul = qtmp*q[j] * table;
+	    }
+	    if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
+	  } else ecoul = 0.0;
+
+	  if (rsq < cut_ljsq[itype][jtype]) {
+	    evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
+	      offset[itype][jtype];
+	    evdwl *= factor_lj;
+	  } else evdwl = 0.0;
+	}
+
+	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
+				 evdwl,ecoul,fpair,delx,dely,delz,thr);
+      }
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairLJCutCoulPPPMOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairLJCutCoulLong::memory_usage();
+
+  return bytes;
+}
diff --git a/src/USER-OMP/pair_lj_cut_coul_pppm_omp.h b/src/USER-OMP/pair_lj_cut_coul_pppm_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..010b7818b22f4a763a6038e9605fcea74269a6ba
--- /dev/null
+++ b/src/USER-OMP/pair_lj_cut_coul_pppm_omp.h
@@ -0,0 +1,54 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(lj/cut/coul/pppm/omp,PairLJCutCoulPPPMOMP)
+
+#else
+
+#ifndef LMP_PAIR_LJ_CUT_COUL_PPPM_OMP_H
+#define LMP_PAIR_LJ_CUT_COUL_PPPM_OMP_H
+
+#include "pair_lj_cut_coul_long.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLJCutCoulPPPMOMP : public PairLJCutCoulLong, public ThrOMP {
+
+ public:
+  PairLJCutCoulPPPMOMP(class LAMMPS *);
+  virtual ~PairLJCutCoulPPPMOMP() {};
+
+  virtual void init_style();
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(int ifrom, int ito, ThrData * const thr);
+
+  class PPPMProxy *kspace;
+  int nproxy;
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_coul_pppm_tip4p_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_pppm_tip4p_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..add0e295e24b554984e8b41e1bdfd3e006f326eb
--- /dev/null
+++ b/src/USER-OMP/pair_lj_cut_coul_pppm_tip4p_omp.cpp
@@ -0,0 +1,487 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_lj_cut_coul_pppm_tip4p_omp.h"
+#include "pppm_proxy.h"
+#include "atom.h"
+#include "comm.h"
+#include "domain.h"
+#include "error.h"
+#include "force.h"
+#include "memory.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "update.h"
+
+#include <string.h>
+
+using namespace LAMMPS_NS;
+
+#define EWALD_F   1.12837917
+#define EWALD_P   0.3275911
+#define A1        0.254829592
+#define A2       -0.284496736
+#define A3        1.421413741
+#define A4       -1.453152027
+#define A5        1.061405429
+
+/* ---------------------------------------------------------------------- */
+
+PairLJCutCoulPPPMTIP4POMP::PairLJCutCoulPPPMTIP4POMP(LAMMPS *lmp) :
+  PairLJCutCoulLongTIP4P(lmp), ThrOMP(lmp, THR_PAIR|THR_PROXY)
+{
+  respa_enable = 0;
+  nproxy=1;
+  
+  kspace = NULL;
+
+  // for caching m-shift corrected positions
+  maxmpos = 0;
+  h1idx = h2idx = NULL;
+  mpos = NULL;
+}
+
+/* ---------------------------------------------------------------------- */
+
+PairLJCutCoulPPPMTIP4POMP::~PairLJCutCoulPPPMTIP4POMP()
+{
+  memory->destroy(h1idx);
+  memory->destroy(h2idx);
+  memory->destroy(mpos);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLJCutCoulPPPMTIP4POMP::init_style()
+{
+  if (comm->nthreads < 2)
+    error->all(FLERR,"need at least two threads per MPI task for this pair style");
+
+  if (strcmp(force->kspace_style,"pppm/tip4p/proxy") != 0)
+    error->all(FLERR,"kspace style pppm/tip4p/proxy is required with this pair style");
+
+  kspace = static_cast<PPPMProxy *>(force->kspace);
+
+  PairLJCutCoulLongTIP4P::init_style();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLJCutCoulPPPMTIP4POMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nlocal = atom->nlocal;
+  const int nall = nlocal + atom->nghost;
+
+  // reallocate per-atom arrays, if necessary
+  if (nall > maxmpos) {
+    maxmpos = nall;
+    memory->grow(mpos,maxmpos,3,"pair:mpos");
+    memory->grow(h1idx,maxmpos,"pair:h1idx");
+    memory->grow(h2idx,maxmpos,"pair:h2idx");
+  }
+
+  // cache corrected M positions in mpos[]
+  const double * const * const x = atom->x;
+  const int * const type = atom->type;
+  for (int i = 0; i < nlocal; i++) {
+    if (type[i] == typeO) {
+      find_M(i,h1idx[i],h2idx[i],mpos[i]);
+    } else {
+      mpos[i][0] = x[i][0];
+      mpos[i][1] = x[i][1];
+      mpos[i][2] = x[i][2];
+    }
+  }
+  for (int i = nlocal; i < nall; i++) {
+    if (type[i] == typeO) {
+      find_M_permissive(i,h1idx[i],h2idx[i],mpos[i]);
+    } else {
+      mpos[i][0] = x[i][0];
+      mpos[i][1] = x[i][1];
+      mpos[i][2] = x[i][2];
+    }
+  }
+
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads, nproxy);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    // thread id 0 runs pppm, the rest the pair style
+    if (tid < nproxy) {
+      if (update->setupflag) kspace->setup_proxy();
+      kspace->compute_proxy(eflag,vflag);
+    } else {
+      if (evflag) {
+	if (eflag) {
+	  if (vflag) eval<1,1,1>(ifrom, ito, thr);
+	  else eval<1,1,0>(ifrom, ito, thr);
+	} else {
+	  if (vflag) eval<1,0,1>(ifrom, ito, thr);
+	  else eval<1,0,0>(ifrom, ito, thr);
+	}
+      } else {
+	eval<0,0,0>(ifrom, ito, thr);
+      }
+    }
+    
+    sync_threads();
+    reduce_thr(this, eflag, vflag, thr, nproxy);
+  } // end of omp parallel region
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <int EVFLAG, int EFLAG, int VFLAG>
+void PairLJCutCoulPPPMTIP4POMP::eval(int iifrom, int iito, ThrData * const thr)
+{
+  int i,j,ii,jj,jnum,itype,jtype,itable;
+  int n,vlist[6];
+  int iH1,iH2,jH1,jH2;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul;
+  double fraction,table;
+  double delxOM,delyOM,delzOM;
+  double r,rsq,r2inv,r6inv,forcecoul,forcelj,cforce;
+  double factor_coul,factor_lj;
+  double grij,expm2,prefactor,t,erfc,ddotf;
+  double v[6],xH1[3],xH2[3];
+  double fdx,fdy,fdz,f1x,f1y,f1z,fOx,fOy,fOz,fHx,fHy,fHz;
+  double *x1,*x2;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+
+  evdwl = ecoul = 0.0;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double * const special_lj = force->special_lj;
+  const double qqrd2e = force->qqrd2e;
+  double fxtmp,fytmp,fztmp;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    qtmp = q[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+    fxtmp=fytmp=fztmp=0.0;
+    x1 = mpos[i];
+    iH1 = h1idx[i];
+    iH2 = h2idx[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_lj = special_lj[sbmask(j)];
+      factor_coul = special_coul[sbmask(j)];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+
+	r2inv = 1.0/rsq;
+	if (rsq < cut_ljsq[itype][jtype]) {
+	  r6inv = r2inv*r2inv*r2inv;
+	  forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
+	  forcelj *= factor_lj * r2inv;
+
+	  fxtmp += delx*forcelj;
+	  fytmp += dely*forcelj;
+	  fztmp += delz*forcelj;
+	  f[j][0] -= delx*forcelj;
+	  f[j][1] -= dely*forcelj;
+	  f[j][2] -= delz*forcelj;
+
+	  if (EFLAG) {
+	    evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
+	      offset[itype][jtype];
+	    evdwl *= factor_lj;
+	  } else evdwl = 0.0;
+
+	  if (EVFLAG) ev_tally_thr(this,i,j,nlocal, /* newton_pair = */ 1,
+				   evdwl,0.0,forcelj,delx,dely,delz,thr);
+	}
+
+	// adjust rsq and delxyz for off-site O charge(s)
+
+	if (itype == typeO || jtype == typeO) { 
+	  x2 = mpos[j];
+	  jH1 = h1idx[j];
+	  jH2 = h2idx[j];
+	  if (jtype == typeO  && ( jH1 < 0 || jH2 < 0 ))
+	    error->one(FLERR,"TIP4P hydrogen is missing");
+	  delx = x1[0] - x2[0];
+	  dely = x1[1] - x2[1];
+	  delz = x1[2] - x2[2];
+	  rsq = delx*delx + dely*dely + delz*delz;
+	}
+
+	// test current rsq against cutoff and compute Coulombic force
+
+	if (rsq < cut_coulsq) {
+	  r2inv = 1 / rsq;
+	  if (!ncoultablebits || rsq <= tabinnersq) {
+	    r = sqrt(rsq);
+	    grij = g_ewald * r;
+	    expm2 = exp(-grij*grij);
+	    t = 1.0 / (1.0 + EWALD_P*grij);
+	    erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+	    prefactor = qqrd2e * qtmp*q[j]/r;
+	    forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
+	    if (factor_coul < 1.0) {
+	      forcecoul -= (1.0-factor_coul)*prefactor; 
+	    }
+	  } else {
+	    union_int_float_t rsq_lookup;
+	    rsq_lookup.f = rsq;
+	    itable = rsq_lookup.i & ncoulmask;
+	    itable >>= ncoulshiftbits;
+	    fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
+	    table = ftable[itable] + fraction*dftable[itable];
+	    forcecoul = qtmp*q[j] * table;
+	    if (factor_coul < 1.0) {
+	      table = ctable[itable] + fraction*dctable[itable];
+	      prefactor = qtmp*q[j] * table;
+	      forcecoul -= (1.0-factor_coul)*prefactor;
+	    }
+	  }
+
+	  cforce = forcecoul * r2inv;
+
+	  // if i,j are not O atoms, force is applied directly
+	  // if i or j are O atoms, force is on fictitious atom & partitioned
+	  // force partitioning due to Feenstra, J Comp Chem, 20, 786 (1999)
+	  // f_f = fictitious force, fO = f_f (1 - 2 alpha), fH = alpha f_f
+	  // preserves total force and torque on water molecule
+	  // virial = sum(r x F) where each water's atoms are near xi and xj
+	  // vlist stores 2,4,6 atoms whose forces contribute to virial
+
+	  n = 0;
+
+	  if (itype != typeO) {
+	    fxtmp += delx * cforce;
+	    fytmp += dely * cforce;
+	    fztmp += delz * cforce;
+
+	    if (VFLAG) {
+	      v[0] = x[i][0] * delx * cforce;
+	      v[1] = x[i][1] * dely * cforce;
+	      v[2] = x[i][2] * delz * cforce;
+	      v[3] = x[i][0] * dely * cforce;
+	      v[4] = x[i][0] * delz * cforce;
+	      v[5] = x[i][1] * delz * cforce;
+	      vlist[n++] = i;
+	    }
+
+	  } else {
+
+            fdx = delx*cforce;
+            fdy = dely*cforce;
+            fdz = delz*cforce;
+
+            delxOM = x[i][0] - x1[0];
+            delyOM = x[i][1] - x1[1];
+            delzOM = x[i][2] - x1[2];
+
+            ddotf = (delxOM * fdx + delyOM * fdy + delzOM * fdz) /
+	      (qdist*qdist);
+
+	    f1x = alpha * (fdx - ddotf * delxOM);
+	    f1y = alpha * (fdy - ddotf * delyOM);
+	    f1z = alpha * (fdz - ddotf * delzOM);
+
+            fOx = fdx - f1x;
+            fOy = fdy - f1y;
+            fOz = fdz - f1z;
+
+            fHx = 0.5 * f1x;
+            fHy = 0.5 * f1y;
+            fHz = 0.5 * f1z;
+
+            fxtmp += fOx;
+            fytmp += fOy;
+            fztmp += fOz;
+
+            f[iH1][0] += fHx;
+            f[iH1][1] += fHy;
+            f[iH1][2] += fHz;
+
+            f[iH2][0] += fHx;
+            f[iH2][1] += fHy;
+            f[iH2][2] += fHz;
+
+	    if (VFLAG) {
+	      domain->closest_image(x[i],x[iH1],xH1);
+	      domain->closest_image(x[i],x[iH2],xH2);
+
+	      v[0] = x[i][0]*fOx + xH1[0]*fHx + xH2[0]*fHx;
+	      v[1] = x[i][1]*fOy + xH1[1]*fHy + xH2[1]*fHy;
+	      v[2] = x[i][2]*fOz + xH1[2]*fHz + xH2[2]*fHz;
+	      v[3] = x[i][0]*fOy + xH1[0]*fHy + xH2[0]*fHy;
+	      v[4] = x[i][0]*fOz + xH1[0]*fHz + xH2[0]*fHz;
+	      v[5] = x[i][1]*fOz + xH1[1]*fHz + xH2[1]*fHz;
+
+	      vlist[n++] = i;
+	      vlist[n++] = iH1;
+	      vlist[n++] = iH2;
+	    }
+	  }
+
+	  if (jtype != typeO) {
+	    f[j][0] -= delx * cforce;
+	    f[j][1] -= dely * cforce;
+	    f[j][2] -= delz * cforce;
+
+	    if (VFLAG) {
+	      v[0] -= x[j][0] * delx * cforce;
+	      v[1] -= x[j][1] * dely * cforce;
+	      v[2] -= x[j][2] * delz * cforce;
+	      v[3] -= x[j][0] * dely * cforce;
+	      v[4] -= x[j][0] * delz * cforce;
+	      v[5] -= x[j][1] * delz * cforce;
+	      vlist[n++] = j;
+	    }
+
+	  } else {
+
+	    fdx = -delx*cforce;
+	    fdy = -dely*cforce;
+	    fdz = -delz*cforce;
+
+	    delxOM = x[j][0] - x2[0];
+	    delyOM = x[j][1] - x2[1];
+	    delzOM = x[j][2] - x2[2];
+
+            ddotf = (delxOM * fdx + delyOM * fdy + delzOM * fdz) /
+	      (qdist*qdist);
+
+	    f1x = alpha * (fdx - ddotf * delxOM);
+	    f1y = alpha * (fdy - ddotf * delyOM);
+	    f1z = alpha * (fdz - ddotf * delzOM);
+
+            fOx = fdx - f1x;
+            fOy = fdy - f1y;
+            fOz = fdz - f1z;
+
+            fHx = 0.5 * f1x;
+            fHy = 0.5 * f1y;
+            fHz = 0.5 * f1z;
+
+	    f[j][0] += fOx;
+	    f[j][1] += fOy;
+	    f[j][2] += fOz;
+
+            f[jH1][0] += fHx;
+            f[jH1][1] += fHy;
+            f[jH1][2] += fHz;
+
+            f[jH2][0] += fHx;
+            f[jH2][1] += fHy;
+            f[jH2][2] += fHz;
+
+	    if (VFLAG) {
+	      domain->closest_image(x[j],x[jH1],xH1);
+	      domain->closest_image(x[j],x[jH2],xH2);
+
+	      v[0] += x[j][0]*fOx + xH1[0]*fHx + xH2[0]*fHx;
+	      v[1] += x[j][1]*fOy + xH1[1]*fHy + xH2[1]*fHy;
+	      v[2] += x[j][2]*fOz + xH1[2]*fHz + xH2[2]*fHz;
+	      v[3] += x[j][0]*fOy + xH1[0]*fHy + xH2[0]*fHy;
+	      v[4] += x[j][0]*fOz + xH1[0]*fHz + xH2[0]*fHz;
+	      v[5] += x[j][1]*fOz + xH1[1]*fHz + xH2[1]*fHz;
+
+	      vlist[n++] = j;
+	      vlist[n++] = jH1;
+	      vlist[n++] = jH2;
+	    }
+	  }
+
+	  if (EFLAG) {
+	    if (!ncoultablebits || rsq <= tabinnersq)
+	      ecoul = prefactor*erfc;
+	    else {
+	      table = etable[itable] + fraction*detable[itable];
+	      ecoul = qtmp*q[j] * table;
+	    }
+	    if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
+	  } else ecoul = 0.0;
+
+	  if (EVFLAG) ev_tally_list_thr(this,n,vlist,ecoul,v,thr);
+	}
+      }
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLJCutCoulPPPMTIP4POMP::find_M_permissive(int i, int &iH1, int &iH2, double *xM)
+{
+  // test that O is correctly bonded to 2 succesive H atoms
+
+   iH1 = atom->map(atom->tag[i] + 1);
+   iH2 = atom->map(atom->tag[i] + 2);
+
+   if (iH1 == -1 || iH2 == -1)
+      return;
+   else
+      find_M(i,iH1,iH2,xM);
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairLJCutCoulPPPMTIP4POMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairLJCutCoulLongTIP4P::memory_usage();
+  bytes += 2 * maxmpos * sizeof(int);
+  bytes += 3 * maxmpos * sizeof(double);
+  bytes += maxmpos * sizeof(double *);
+
+  return bytes;
+}
diff --git a/src/USER-OMP/pair_lj_cut_coul_pppm_tip4p_omp.h b/src/USER-OMP/pair_lj_cut_coul_pppm_tip4p_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..71da06eb9b597fd2001585a7170aa2ee3d0a032b
--- /dev/null
+++ b/src/USER-OMP/pair_lj_cut_coul_pppm_tip4p_omp.h
@@ -0,0 +1,61 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(lj/cut/coul/pppm/tip4p/omp,PairLJCutCoulPPPMTIP4POMP)
+
+#else
+
+#ifndef LMP_PAIR_LJ_CUT_COUL_PPPM_TIP4P_OMP_H
+#define LMP_PAIR_LJ_CUT_COUL_PPPM_TIP4P_OMP_H
+
+#include "pair_lj_cut_coul_long_tip4p.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLJCutCoulPPPMTIP4POMP : public PairLJCutCoulLongTIP4P, public ThrOMP {
+
+ public:
+  PairLJCutCoulPPPMTIP4POMP(class LAMMPS *);
+  virtual ~PairLJCutCoulPPPMTIP4POMP();
+
+  virtual void init_style();
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ protected:
+  // this is to cache m-shift corrected positions.
+  int maxmpos;        // size of the following arrays
+  int *h1idx, *h2idx; // local index of hydrogen atoms
+  double **mpos;      // coordinates corrected for m-shift.
+  void find_M_permissive(int, int &, int &, double *);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(int ifrom, int ito, ThrData * const thr);
+
+  class PPPMProxy *kspace;
+  int nproxy;
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_sdk_coul_long_omp.cpp b/src/USER-OMP/pair_lj_sdk_coul_long_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0e120a8639c91a3b17d0e11d0d2b299e03ef0724
--- /dev/null
+++ b/src/USER-OMP/pair_lj_sdk_coul_long_omp.cpp
@@ -0,0 +1,248 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_lj_sdk_coul_long_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+#include "lj_sdk_common.h"
+
+using namespace LAMMPS_NS;
+using namespace LJSDKParms;
+
+#define EWALD_F   1.12837917
+#define EWALD_P   0.3275911
+#define A1        0.254829592
+#define A2       -0.284496736
+#define A3        1.421413741
+#define A4       -1.453152027
+#define A5        1.061405429
+
+/* ---------------------------------------------------------------------- */
+
+PairLJSDKCoulLongOMP::PairLJSDKCoulLongOMP(LAMMPS *lmp) :
+  PairLJSDKCoulLong(lmp), ThrOMP(lmp, THR_PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLJSDKCoulLongOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_pair) eval_thr<1,1,1>(ifrom, ito, thr);
+	else eval_thr<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_pair) eval_thr<1,0,1>(ifrom, ito, thr);
+	else eval_thr<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_pair) eval_thr<0,0,1>(ifrom, ito, thr);
+      else eval_thr<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairLJSDKCoulLongOMP::eval_thr(int iifrom, int iito, ThrData * const thr)
+{
+  int i,ii,j,jj,jtype,itable;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
+  double fraction,table;
+  double r,rsq,r2inv,forcecoul,forcelj,factor_coul,factor_lj;
+  double grij,expm2,prefactor,t,erfc;
+
+  evdwl = ecoul = 0.0;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double * const special_lj = force->special_lj;
+  const double qqrd2e = force->qqrd2e;
+  double fxtmp,fytmp,fztmp;
+
+  const int * const ilist = list->ilist;
+  const int * const numneigh = list->numneigh;
+  const int * const * const firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    qtmp = q[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    fxtmp=fytmp=fztmp=0.0;
+
+    const int itype = type[i];
+    const int * const jlist = firstneigh[i];
+    const int jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_lj = special_lj[sbmask(j)];
+      factor_coul = special_coul[sbmask(j)];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+	r2inv = 1.0/rsq;
+	const int ljt = lj_type[itype][jtype];
+
+	if (rsq < cut_coulsq) {
+	  if (!ncoultablebits || rsq <= tabinnersq) {
+	    r = sqrt(rsq);
+	    grij = g_ewald * r;
+	    expm2 = exp(-grij*grij);
+	    t = 1.0 / (1.0 + EWALD_P*grij);
+	    erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+	    prefactor = qqrd2e * qtmp*q[j]/r;
+	    forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
+	    if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
+	  } else {
+	    union_int_float_t rsq_lookup;
+	    rsq_lookup.f = rsq;
+	    itable = rsq_lookup.i & ncoulmask;
+	    itable >>= ncoulshiftbits;
+	    fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
+	    table = ftable[itable] + fraction*dftable[itable];
+	    forcecoul = qtmp*q[j] * table;
+	    if (factor_coul < 1.0) {
+	      table = ctable[itable] + fraction*dctable[itable];
+	      prefactor = qtmp*q[j] * table;
+	      forcecoul -= (1.0-factor_coul)*prefactor;
+	    }
+	  }
+	} else {
+	  forcecoul = 0.0;
+	  ecoul = 0.0;
+	}
+
+	if (rsq < cut_ljsq[itype][jtype]) {
+
+	  if (ljt == LJ12_4) {
+	    const double r4inv=r2inv*r2inv;
+	    forcelj = r4inv*(lj1[itype][jtype]*r4inv*r4inv
+			     - lj2[itype][jtype]);
+
+	    if (EFLAG)
+	      evdwl = r4inv*(lj3[itype][jtype]*r4inv*r4inv
+			     - lj4[itype][jtype]) - offset[itype][jtype];
+	  
+	  } else if (ljt == LJ9_6) {
+	    const double r3inv = r2inv*sqrt(r2inv);
+	    const double r6inv = r3inv*r3inv;
+	    forcelj = r6inv*(lj1[itype][jtype]*r3inv
+			     - lj2[itype][jtype]);
+	    if (EFLAG)
+	      evdwl = r6inv*(lj3[itype][jtype]*r3inv
+			     - lj4[itype][jtype]) - offset[itype][jtype];
+
+	  } else if (ljt == LJ12_6) {
+	    const double r6inv = r2inv*r2inv*r2inv;
+	    forcelj = r6inv*(lj1[itype][jtype]*r6inv
+			     - lj2[itype][jtype]);
+	    if (EFLAG)
+	      evdwl = r6inv*(lj3[itype][jtype]*r6inv
+			     - lj4[itype][jtype]) - offset[itype][jtype];
+	  }
+	} else {
+	  forcelj=0.0;
+	  evdwl = 0.0;
+	}
+
+	fpair = (forcecoul + factor_lj*forcelj) * r2inv;
+
+	fxtmp += delx*fpair;
+	fytmp += dely*fpair;
+	fztmp += delz*fpair;
+	if (NEWTON_PAIR || j < nlocal) {
+	  f[j][0] -= delx*fpair;
+	  f[j][1] -= dely*fpair;
+	  f[j][2] -= delz*fpair;
+	}
+
+	if (EFLAG) {
+	  if (rsq < cut_coulsq) {
+	    if (!ncoultablebits || rsq <= tabinnersq)
+	      ecoul = prefactor*erfc;
+	    else {
+	      table = etable[itable] + fraction*detable[itable];
+	      ecoul = qtmp*q[j] * table;
+	    }
+	    if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
+	  } else ecoul = 0.0;
+
+	  if (rsq < cut_ljsq[itype][jtype]) {
+	    evdwl *= factor_lj;
+	  } else evdwl = 0.0;
+	}
+
+	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
+				 evdwl,ecoul,fpair,delx,dely,delz,thr);
+      }
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairLJSDKCoulLongOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairLJSDKCoulLong::memory_usage();
+
+  return bytes;
+}
diff --git a/src/USER-OMP/pair_lj_sdk_coul_long_omp.h b/src/USER-OMP/pair_lj_sdk_coul_long_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..01912535b1d4282dcd81049fe7623fac7444f212
--- /dev/null
+++ b/src/USER-OMP/pair_lj_sdk_coul_long_omp.h
@@ -0,0 +1,49 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(lj/sdk/coul/long/omp,PairLJSDKCoulLongOMP)
+PairStyle(cg/cmm/coul/long/omp,PairLJSDKCoulLongOMP)
+
+#else
+
+#ifndef LMP_PAIR_LJ_SDK_COUL_LONG_OMP_H
+#define LMP_PAIR_LJ_SDK_COUL_LONG_OMP_H
+
+#include "pair_lj_sdk_coul_long.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLJSDKCoulLongOMP : public PairLJSDKCoulLong, public ThrOMP {
+
+ public:
+  PairLJSDKCoulLongOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval_thr(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_sdk_omp.cpp b/src/USER-OMP/pair_lj_sdk_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..bc5faafad54c1b68d33c8116cf591b8d09bcba0e
--- /dev/null
+++ b/src/USER-OMP/pair_lj_sdk_omp.cpp
@@ -0,0 +1,184 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+   This style is a simplified re-implementation of the CG/CMM pair style
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_lj_sdk_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+#include "lj_sdk_common.h"
+
+using namespace LAMMPS_NS;
+using namespace LJSDKParms;
+
+/* ---------------------------------------------------------------------- */
+
+PairLJSDKOMP::PairLJSDKOMP(LAMMPS *lmp) :
+  PairLJSDK(lmp), ThrOMP(lmp, THR_PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLJSDKOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_pair) eval_thr<1,1,1>(ifrom, ito, thr);
+	else eval_thr<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_pair) eval_thr<1,0,1>(ifrom, ito, thr);
+	else eval_thr<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_pair) eval_thr<0,0,1>(ifrom, ito, thr);
+      else eval_thr<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairLJSDKOMP::eval_thr(int iifrom, int iito, ThrData * const thr)
+{
+  int i,j,ii,jj,jtype;
+  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
+  double rsq,r2inv,forcelj,factor_lj;
+
+  evdwl = 0.0;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
+  double fxtmp,fytmp,fztmp;
+
+  const int * const ilist = list->ilist;
+  const int * const numneigh = list->numneigh;
+  const int * const * const firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    fxtmp=fytmp=fztmp=0.0;
+
+    const int itype = type[i];
+    const int * const jlist = firstneigh[i];
+    const int jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_lj = special_lj[sbmask(j)];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+	r2inv = 1.0/rsq;
+	const int ljt = lj_type[itype][jtype];
+
+	if (ljt == LJ12_4) {
+	  const double r4inv=r2inv*r2inv;
+	  forcelj = r4inv*(lj1[itype][jtype]*r4inv*r4inv
+			   - lj2[itype][jtype]);
+
+	  if (EFLAG)
+	    evdwl = r4inv*(lj3[itype][jtype]*r4inv*r4inv
+			   - lj4[itype][jtype]) - offset[itype][jtype];
+	  
+	} else if (ljt == LJ9_6) {
+	  const double r3inv = r2inv*sqrt(r2inv);
+	  const double r6inv = r3inv*r3inv;
+	  forcelj = r6inv*(lj1[itype][jtype]*r3inv
+			   - lj2[itype][jtype]);
+	  if (EFLAG)
+	    evdwl = r6inv*(lj3[itype][jtype]*r3inv
+			   - lj4[itype][jtype]) - offset[itype][jtype];
+
+	} else if (ljt == LJ12_6) {
+	  const double r6inv = r2inv*r2inv*r2inv;
+	  forcelj = r6inv*(lj1[itype][jtype]*r6inv
+			  - lj2[itype][jtype]);
+	  if (EFLAG)
+	    evdwl = r6inv*(lj3[itype][jtype]*r6inv
+			   - lj4[itype][jtype]) - offset[itype][jtype];
+	} else continue;
+
+	fpair = factor_lj*forcelj*r2inv;
+
+	fxtmp += delx*fpair;
+	fytmp += dely*fpair;
+	fztmp += delz*fpair;
+	if (NEWTON_PAIR || j < nlocal) {
+	  f[j][0] -= delx*fpair;
+	  f[j][1] -= dely*fpair;
+	  f[j][2] -= delz*fpair;
+	}
+
+	if (EFLAG) evdwl *= factor_lj;
+	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
+      }
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairLJSDKOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairLJSDK::memory_usage();
+
+  return bytes;
+}
diff --git a/src/USER-OMP/pair_lj_sdk_omp.h b/src/USER-OMP/pair_lj_sdk_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..f2c9e5ff1fd6521b476f50fcf3bd8d0903d7d615
--- /dev/null
+++ b/src/USER-OMP/pair_lj_sdk_omp.h
@@ -0,0 +1,49 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(lj/sdk/omp,PairLJSDKOMP)
+PairStyle(cg/cmm/omp,PairLJSDKOMP)
+
+#else
+
+#ifndef LMP_PAIR_LJ_SDK_OMP_H
+#define LMP_PAIR_LJ_SDK_OMP_H
+
+#include "pair_lj_sdk.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLJSDKOMP : public PairLJSDK, public ThrOMP {
+
+ public:
+  PairLJSDKOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval_thr(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_rebo_omp.cpp b/src/USER-OMP/pair_rebo_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..70b5c4e8ae111fe1a2bca7620144b57eff679978
--- /dev/null
+++ b/src/USER-OMP/pair_rebo_omp.cpp
@@ -0,0 +1,33 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "pair_rebo_omp.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairREBOOMP::PairREBOOMP(LAMMPS *lmp) : PairAIREBOOMP(lmp) {}
+
+/* ----------------------------------------------------------------------
+   global settings
+------------------------------------------------------------------------- */
+
+void PairREBOOMP::settings(int narg, char **arg)
+{
+  if (narg != 0) error->all(FLERR,"Illegal pair_style command");
+
+  cutlj = 0.0;
+  ljflag = torflag = 0;
+}
diff --git a/src/USER-OMP/pair_rebo_omp.h b/src/USER-OMP/pair_rebo_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..4606e56ae1c6d538bd26605ba13aa9b7c8c0b1f7
--- /dev/null
+++ b/src/USER-OMP/pair_rebo_omp.h
@@ -0,0 +1,36 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(rebo/omp,PairREBOOMP)
+
+#else
+
+#ifndef LMP_PAIR_REBO_OMP_H
+#define LMP_PAIR_REBO_OMP_H
+
+#include "pair_airebo_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairREBOOMP : public PairAIREBOOMP {
+ public:
+  PairREBOOMP(class LAMMPS *);
+  virtual void settings(int, char **);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_tersoff_table_omp.cpp b/src/USER-OMP/pair_tersoff_table_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6c9a680f0964539d06baad8dd9a6136bc9914038
--- /dev/null
+++ b/src/USER-OMP/pair_tersoff_table_omp.cpp
@@ -0,0 +1,496 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_tersoff_table_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairTersoffTableOMP::PairTersoffTableOMP(LAMMPS *lmp) :
+  PairTersoffTable(lmp), ThrOMP(lmp, THR_PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairTersoffTableOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = vflag_fdotr = vflag_atom = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag)
+      if (vflag_atom) eval<1,1>(ifrom, ito, thr);
+      else eval<1,0>(ifrom, ito, thr);
+    else eval<0,0>(ifrom, ito, thr);
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int VFLAG_ATOM>
+void PairTersoffTableOMP::eval(int iifrom, int iito, ThrData * const thr)
+{
+  int i,j,k,ii,inum,jnum;
+  int itype,jtype,ktype,ijparam,ikparam,ijkparam;
+  double xtmp,ytmp,ztmp;
+  double fxtmp,fytmp,fztmp;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+
+  int interpolIDX;
+  double r_ik_x, r_ik_y, r_ik_z;
+  double directorCos_ij_x, directorCos_ij_y, directorCos_ij_z, directorCos_ik_x, directorCos_ik_y, directorCos_ik_z;
+  double invR_ij, invR_ik, cosTeta;
+  double repulsivePotential, attractivePotential;
+  double exponentRepulsivePotential, exponentAttractivePotential,interpolTMP,interpolDeltaX,interpolY1;
+  double interpolY2, cutoffFunctionIJ, attractiveExponential, repulsiveExponential, cutoffFunctionDerivedIJ,zeta; 
+  double gtetaFunctionIJK,gtetaFunctionDerivedIJK,cutoffFunctionIK;
+  double cutoffFunctionDerivedIK,factor_force3_ij,factor_1_force3_ik;
+  double factor_2_force3_ik,betaZetaPowerIJK,betaZetaPowerDerivedIJK,factor_force_tot;
+  double factor_force_ij;
+  double gtetaFunctionDerived_temp,gtetaFunction_temp;
+
+  double evdwl = 0.0;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+
+  inum = list->inum;
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over full neighbor list of my atoms
+  for (ii = iifrom; ii < iito; ii++) {
+
+    i = ilist[ii];
+    itype = map[type[i]];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    fxtmp = fytmp = fztmp = 0.0;
+
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    // Pre-calculate gteta and cutoff function
+    for (int neighbor_j = 0; neighbor_j < jnum; neighbor_j++) {
+
+      double dr_ij[3], r_ij;
+
+      j = jlist[neighbor_j];
+      j &= NEIGHMASK;
+
+      dr_ij[0] = xtmp - x[j][0];
+      dr_ij[1] = ytmp - x[j][1];
+      dr_ij[2] = ztmp - x[j][2];
+      r_ij = dr_ij[0]*dr_ij[0] + dr_ij[1]*dr_ij[1] + dr_ij[2]*dr_ij[2];
+
+      jtype = map[type[j]];
+      ijparam = elem2param[itype][jtype][jtype];
+
+      if (r_ij > params[ijparam].cutsq) continue;
+
+      r_ij = sqrt(r_ij);
+
+      invR_ij = 1.0 / r_ij;
+      
+      directorCos_ij_x = invR_ij * dr_ij[0];
+      directorCos_ij_y = invR_ij * dr_ij[1];
+      directorCos_ij_z = invR_ij * dr_ij[2];
+      
+      // preCutoffFunction
+      interpolDeltaX =  r_ij - GRIDSTART;
+      interpolTMP = (interpolDeltaX * GRIDDENSITY_FCUTOFF);
+      interpolIDX = (int) interpolTMP;
+      interpolY1 = cutoffFunction[itype][jtype][interpolIDX];
+      interpolY2 = cutoffFunction[itype][jtype][interpolIDX+1];
+      preCutoffFunction[neighbor_j] = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
+      // preCutoffFunctionDerived
+      interpolY1 = cutoffFunctionDerived[itype][jtype][interpolIDX];
+      interpolY2 = cutoffFunctionDerived[itype][jtype][interpolIDX+1];
+      preCutoffFunctionDerived[neighbor_j] = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
+
+
+      for (int neighbor_k = neighbor_j + 1; neighbor_k < jnum; neighbor_k++) {
+	double dr_ik[3], r_ik;
+
+	k = jlist[neighbor_k];
+	k &= NEIGHMASK;
+	ktype = map[type[k]];
+	ikparam = elem2param[itype][ktype][ktype];
+	ijkparam = elem2param[itype][jtype][ktype];
+
+	dr_ik[0] = xtmp -x[k][0];
+	dr_ik[1] = ytmp -x[k][1];
+	dr_ik[2] = ztmp -x[k][2];
+	r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2];
+
+	if (r_ik > params[ikparam].cutsq) continue;
+
+	r_ik = sqrt(r_ik);
+
+	invR_ik = 1.0 / r_ik;
+          
+	directorCos_ik_x = invR_ik * dr_ik[0];
+	directorCos_ik_y = invR_ik * dr_ik[1];
+	directorCos_ik_z = invR_ik * dr_ik[2];
+          
+	cosTeta = directorCos_ij_x * directorCos_ik_x + directorCos_ij_y * directorCos_ik_y + directorCos_ij_z * directorCos_ik_z;
+          
+	// preGtetaFunction
+	interpolDeltaX=cosTeta+1.0;
+	interpolTMP = (interpolDeltaX * GRIDDENSITY_GTETA);
+	interpolIDX = (int) interpolTMP;
+	interpolY1 = gtetaFunction[itype][interpolIDX];
+	interpolY2 = gtetaFunction[itype][interpolIDX+1];
+	gtetaFunction_temp = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
+	// preGtetaFunctionDerived
+	interpolY1 = gtetaFunctionDerived[itype][interpolIDX];
+	interpolY2 = gtetaFunctionDerived[itype][interpolIDX+1];
+	gtetaFunctionDerived_temp = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
+          
+	preGtetaFunction[neighbor_j][neighbor_k]=params[ijkparam].gamma*gtetaFunction_temp;
+	preGtetaFunctionDerived[neighbor_j][neighbor_k]=params[ijkparam].gamma*gtetaFunctionDerived_temp;
+	preGtetaFunction[neighbor_k][neighbor_j]=params[ijkparam].gamma*gtetaFunction_temp;
+	preGtetaFunctionDerived[neighbor_k][neighbor_j]=params[ijkparam].gamma*gtetaFunctionDerived_temp;
+
+      } // loop on K
+
+    } // loop on J
+
+
+    // loop over neighbours of atom i
+    for (int neighbor_j = 0; neighbor_j < jnum; neighbor_j++) {
+
+      double dr_ij[3], r_ij, f_ij[3];
+
+      j = jlist[neighbor_j];
+      j &= NEIGHMASK;
+
+      dr_ij[0] = xtmp - x[j][0];
+      dr_ij[1] = ytmp - x[j][1];
+      dr_ij[2] = ztmp - x[j][2];
+      r_ij = dr_ij[0]*dr_ij[0] + dr_ij[1]*dr_ij[1] + dr_ij[2]*dr_ij[2];
+
+      jtype = map[type[j]];
+      ijparam = elem2param[itype][jtype][jtype];
+
+      if (r_ij > params[ijparam].cutsq) continue;
+
+      r_ij = sqrt(r_ij);
+      invR_ij = 1.0 / r_ij;
+      
+      directorCos_ij_x = invR_ij * dr_ij[0];
+      directorCos_ij_y = invR_ij * dr_ij[1];
+      directorCos_ij_z = invR_ij * dr_ij[2];
+
+      exponentRepulsivePotential = params[ijparam].lam1 * r_ij;
+      exponentAttractivePotential = params[ijparam].lam2 * r_ij;
+
+      // repulsiveExponential
+      interpolDeltaX =  exponentRepulsivePotential - minArgumentExponential;
+      interpolTMP = (interpolDeltaX * GRIDDENSITY_EXP);
+      interpolIDX = (int) interpolTMP;
+      interpolY1 = exponential[interpolIDX];
+      interpolY2 = exponential[interpolIDX+1];
+      repulsiveExponential = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
+      // attractiveExponential
+      interpolDeltaX =  exponentAttractivePotential - minArgumentExponential;
+      interpolTMP = (interpolDeltaX * GRIDDENSITY_EXP);
+      interpolIDX = (int) interpolTMP;
+      interpolY1 = exponential[interpolIDX];
+      interpolY2 = exponential[interpolIDX+1];
+      attractiveExponential = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
+
+      repulsivePotential = params[ijparam].biga * repulsiveExponential;
+      attractivePotential = -params[ijparam].bigb * attractiveExponential;
+
+      cutoffFunctionIJ = preCutoffFunction[neighbor_j];
+      cutoffFunctionDerivedIJ = preCutoffFunctionDerived[neighbor_j];
+
+      zeta = 0.0;
+            
+      // first loop over neighbours of atom i except j - part 1/2
+      for (int neighbor_k = 0; neighbor_k < neighbor_j; neighbor_k++) {
+	double dr_ik[3], r_ik;
+
+	k = jlist[neighbor_k];
+	k &= NEIGHMASK;
+	ktype = map[type[k]];
+	ikparam = elem2param[itype][ktype][ktype];
+	ijkparam = elem2param[itype][jtype][ktype];
+
+	dr_ik[0] = xtmp -x[k][0];
+	dr_ik[1] = ytmp -x[k][1];
+	dr_ik[2] = ztmp -x[k][2];
+	r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2];
+
+	if (r_ik > params[ikparam].cutsq) continue;
+
+	r_ik = sqrt(r_ik);
+
+	invR_ik = 1.0 / r_ik;
+          
+	directorCos_ik_x = invR_ik * r_ik_x;
+	directorCos_ik_y = invR_ik * r_ik_y;
+	directorCos_ik_z = invR_ik * r_ik_z;
+          
+	gtetaFunctionIJK = preGtetaFunction[neighbor_j][neighbor_k];
+          
+	cutoffFunctionIK = preCutoffFunction[neighbor_k];
+          
+	zeta += cutoffFunctionIK * gtetaFunctionIJK;
+          
+      }
+
+      // first loop over neighbours of atom i except j - part 2/2
+      for (int neighbor_k = neighbor_j+1; neighbor_k < jnum; neighbor_k++) {
+	double dr_ik[3], r_ik;
+
+	k = jlist[neighbor_k];
+	k &= NEIGHMASK;
+	ktype = map[type[k]];
+	ikparam = elem2param[itype][ktype][ktype];
+	ijkparam = elem2param[itype][jtype][ktype];
+
+	dr_ik[0] = xtmp -x[k][0];
+	dr_ik[1] = ytmp -x[k][1];
+	dr_ik[2] = ztmp -x[k][2];
+	r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2];
+
+	if (r_ik > params[ikparam].cutsq) continue;
+
+	r_ik = sqrt(r_ik);
+	invR_ik = 1.0 / r_ik;
+          
+	directorCos_ik_x = invR_ik * dr_ik[0];
+	directorCos_ik_y = invR_ik * dr_ik[1];
+	directorCos_ik_z = invR_ik * dr_ik[2];
+          
+	gtetaFunctionIJK = preGtetaFunction[neighbor_j][neighbor_k];
+          
+	cutoffFunctionIK = preCutoffFunction[neighbor_k];
+          
+	zeta += cutoffFunctionIK * gtetaFunctionIJK;
+      }
+            
+      // betaZetaPowerIJK 
+      interpolDeltaX= params[ijparam].beta * zeta;
+      interpolTMP = (interpolDeltaX * GRIDDENSITY_BIJ);
+      interpolIDX = (int) interpolTMP;
+      interpolY1 = betaZetaPower[itype][interpolIDX];
+      interpolY2 = betaZetaPower[itype][interpolIDX+1];
+      betaZetaPowerIJK = (interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX));
+      // betaZetaPowerDerivedIJK
+      interpolY1 = betaZetaPowerDerived[itype][interpolIDX];
+      interpolY2 = betaZetaPowerDerived[itype][interpolIDX+1];
+      betaZetaPowerDerivedIJK = params[ijparam].beta*(interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX));
+
+      // Forces and virial
+      factor_force_ij = 0.5*cutoffFunctionDerivedIJ*(repulsivePotential + attractivePotential * betaZetaPowerIJK)+0.5*cutoffFunctionIJ*(-repulsivePotential*params[ijparam].lam1-betaZetaPowerIJK*attractivePotential*params[ijparam].lam2);
+
+      f_ij[0] = factor_force_ij * directorCos_ij_x;
+      f_ij[1] = factor_force_ij * directorCos_ij_y;
+      f_ij[2] = factor_force_ij * directorCos_ij_z;
+
+      f[j][0] += f_ij[0];
+      f[j][1] += f_ij[1];
+      f[j][2] += f_ij[2];
+
+      fxtmp -= f_ij[0];
+      fytmp -= f_ij[1];
+      fztmp -= f_ij[2];
+
+      // potential energy
+      evdwl = cutoffFunctionIJ * repulsivePotential 
+	+ cutoffFunctionIJ * attractivePotential * betaZetaPowerIJK;
+
+      if (EVFLAG) ev_tally_thr(this,i, j, nlocal, /* newton_pair */ 1, 0.5 * evdwl, 0.0,
+			       -factor_force_ij*invR_ij, dr_ij[0], dr_ij[1], dr_ij[2],thr);
+
+      factor_force_tot= 0.5*cutoffFunctionIJ*attractivePotential*betaZetaPowerDerivedIJK;
+
+      // second loop over neighbours of atom i except j, forces and virial only - part 1/2
+      for (int neighbor_k = 0; neighbor_k < neighbor_j; neighbor_k++) {
+	double dr_ik[3], r_ik, f_ik[3];
+
+	k = jlist[neighbor_k];
+	k &= NEIGHMASK;
+	ktype = map[type[k]];
+	ikparam = elem2param[itype][ktype][ktype];
+	ijkparam = elem2param[itype][jtype][ktype];
+
+	dr_ik[0] = xtmp -x[k][0];
+	dr_ik[1] = ytmp -x[k][1];
+	dr_ik[2] = ztmp -x[k][2];
+	r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2];
+
+	if (r_ik > params[ikparam].cutsq) continue;
+
+	r_ik = sqrt(r_ik);
+	invR_ik = 1.0 / r_ik;
+          
+	directorCos_ik_x = invR_ik * dr_ik[0];
+	directorCos_ik_y = invR_ik * dr_ik[1];
+	directorCos_ik_z = invR_ik * dr_ik[2];
+          
+	cosTeta = directorCos_ij_x * directorCos_ik_x + directorCos_ij_y * directorCos_ik_y
+	  + directorCos_ij_z * directorCos_ik_z;
+          
+	gtetaFunctionIJK = preGtetaFunction[neighbor_j][neighbor_k];
+          
+	gtetaFunctionDerivedIJK = preGtetaFunctionDerived[neighbor_j][neighbor_k];
+          
+	cutoffFunctionIK = preCutoffFunction[neighbor_k];
+          
+	cutoffFunctionDerivedIK = preCutoffFunctionDerived[neighbor_k];
+          
+	factor_force3_ij= cutoffFunctionIK * gtetaFunctionDerivedIJK * invR_ij *factor_force_tot;
+          
+	f_ij[0] = factor_force3_ij * (directorCos_ij_x*cosTeta - directorCos_ik_x);
+	f_ij[1] = factor_force3_ij * (directorCos_ij_y*cosTeta - directorCos_ik_y);
+	f_ij[2] = factor_force3_ij * (directorCos_ij_z*cosTeta - directorCos_ik_z);
+          
+	factor_1_force3_ik = (cutoffFunctionIK * gtetaFunctionDerivedIJK * invR_ik)*factor_force_tot;
+	factor_2_force3_ik = -(cutoffFunctionDerivedIK * gtetaFunctionIJK)*factor_force_tot;
+          
+	f_ik[0] = factor_1_force3_ik * (directorCos_ik_x*cosTeta - directorCos_ij_x)
+	  + factor_2_force3_ik * directorCos_ik_x;
+	f_ik[1] = factor_1_force3_ik * (directorCos_ik_y*cosTeta - directorCos_ij_y)
+	  + factor_2_force3_ik * directorCos_ik_y;
+	f_ik[2] = factor_1_force3_ik * (directorCos_ik_z*cosTeta - directorCos_ij_z)
+	  + factor_2_force3_ik * directorCos_ik_z;
+          
+	f[j][0] -= f_ij[0];
+	f[j][1] -= f_ij[1];
+	f[j][2] -= f_ij[2];
+
+	f[k][0] -= f_ik[0];
+	f[k][1] -= f_ik[1];
+	f[k][2] -= f_ik[2];
+
+	fxtmp += f_ij[0] + f_ik[0];
+	fytmp += f_ij[1] + f_ik[1];
+	fztmp += f_ij[2] + f_ik[2];
+
+	if (VFLAG_ATOM) v_tally3_thr(i,j,k,f_ij,f_ik,dr_ij,dr_ik,thr);
+      }
+            
+      // second loop over neighbours of atom i except j, forces and virial only - part 2/2
+      for (int neighbor_k = neighbor_j+1; neighbor_k < jnum; neighbor_k++) {
+	double dr_ik[3], r_ik, f_ik[3];
+
+	k = jlist[neighbor_k];
+	k &= NEIGHMASK;
+	ktype = map[type[k]];
+	ikparam = elem2param[itype][ktype][ktype];
+	ijkparam = elem2param[itype][jtype][ktype];
+
+	dr_ik[0] = xtmp -x[k][0];
+	dr_ik[1] = ytmp -x[k][1];
+	dr_ik[2] = ztmp -x[k][2];
+	r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2];
+
+	if (r_ik > params[ikparam].cutsq) continue;
+
+	r_ik = sqrt(r_ik);
+	invR_ik = 1.0 / r_ik;
+          
+	directorCos_ik_x = invR_ik * dr_ik[0];
+	directorCos_ik_y = invR_ik * dr_ik[1];
+	directorCos_ik_z = invR_ik * dr_ik[2];
+          
+	cosTeta = directorCos_ij_x * directorCos_ik_x + directorCos_ij_y * directorCos_ik_y
+	  + directorCos_ij_z * directorCos_ik_z;
+          
+	gtetaFunctionIJK = preGtetaFunction[neighbor_j][neighbor_k];
+          
+	gtetaFunctionDerivedIJK = preGtetaFunctionDerived[neighbor_j][neighbor_k];
+          
+	cutoffFunctionIK = preCutoffFunction[neighbor_k];
+          
+	cutoffFunctionDerivedIK = preCutoffFunctionDerived[neighbor_k];
+          
+	factor_force3_ij= cutoffFunctionIK * gtetaFunctionDerivedIJK * invR_ij *factor_force_tot;
+          
+	f_ij[0] = factor_force3_ij * (directorCos_ij_x*cosTeta - directorCos_ik_x);
+	f_ij[1] = factor_force3_ij * (directorCos_ij_y*cosTeta - directorCos_ik_y);
+	f_ij[2] = factor_force3_ij * (directorCos_ij_z*cosTeta - directorCos_ik_z);
+          
+	factor_1_force3_ik = (cutoffFunctionIK * gtetaFunctionDerivedIJK * invR_ik)*factor_force_tot;
+	factor_2_force3_ik = -(cutoffFunctionDerivedIK * gtetaFunctionIJK)*factor_force_tot;
+          
+	f_ik[0] = factor_1_force3_ik * (directorCos_ik_x*cosTeta - directorCos_ij_x)
+	  + factor_2_force3_ik * directorCos_ik_x;
+	f_ik[1] = factor_1_force3_ik * (directorCos_ik_y*cosTeta - directorCos_ij_y)
+	  + factor_2_force3_ik * directorCos_ik_y;
+	f_ik[2] = factor_1_force3_ik * (directorCos_ik_z*cosTeta - directorCos_ij_z)
+	  + factor_2_force3_ik * directorCos_ik_z;
+          
+	f[j][0] -= f_ij[0];
+	f[j][1] -= f_ij[1];
+	f[j][2] -= f_ij[2];
+
+	f[k][0] -= f_ik[0];
+	f[k][1] -= f_ik[1];
+	f[k][2] -= f_ik[2];
+
+	fxtmp += f_ij[0] + f_ik[0];
+	fytmp += f_ij[1] + f_ik[1];
+	fztmp += f_ij[2] + f_ik[2];
+
+	if (VFLAG_ATOM) v_tally3_thr(i,j,k,f_ij,f_ik,dr_ij,dr_ik,thr);
+         
+      }
+    } // loop on J
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+  } // loop on I
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairTersoffTableOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairTersoffTable::memory_usage();
+
+  return bytes;
+}
diff --git a/src/USER-OMP/pair_tersoff_table_omp.h b/src/USER-OMP/pair_tersoff_table_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..ffde58215d30c14d960ed4f276291d88f6e87f9c
--- /dev/null
+++ b/src/USER-OMP/pair_tersoff_table_omp.h
@@ -0,0 +1,43 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(tersoff/table/omp,PairTersoffTableOMP)
+
+#else
+
+#ifndef LMP_PAIR_TERSOFF_TABLE_OMP_H
+#define LMP_PAIR_TERSOFF_TABLE_OMP_H
+
+#include "pair_tersoff_table.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairTersoffTableOMP : public PairTersoffTable, public ThrOMP {
+
+ public:
+  PairTersoffTableOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int VFLAG_ATOM>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_tri_lj_omp.cpp b/src/USER-OMP/pair_tri_lj_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..97f58a62d553fe8e355048b4f06507e700b33e58
--- /dev/null
+++ b/src/USER-OMP/pair_tri_lj_omp.cpp
@@ -0,0 +1,409 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_tri_lj_omp.h"
+#include "math_extra.h"
+#include "atom.h"
+#include "atom_vec_tri.h"
+#include "comm.h"
+#include "force.h"
+#include "memory.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+#include <string.h>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairTriLJOMP::PairTriLJOMP(LAMMPS *lmp) :
+  PairTriLJ(lmp), ThrOMP(lmp, THR_PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairTriLJOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+  const int * const tri = atom->tri;
+  const int * const type = atom->type;
+  AtomVecTri::Bonus * const bonus = avec->bonus;
+
+  // grow discrete list if necessary and initialize
+
+  if (nall > nmax) {
+    nmax = nall;
+    memory->destroy(dnum);
+    memory->destroy(dfirst);
+    memory->create(dnum,nall,"pair:dnum");
+    memory->create(dfirst,nall,"pair:dfirst");
+  }
+  memset(dnum,0,nall*sizeof(int));
+  ndiscrete = 0;
+
+  // need to discretize the system ahead of time
+  // until we find a good way to multi-thread it.
+  for (int i = 0; i < nall; ++i) {
+    double dc1[3],dc2[3],dc3[3],p[3][3];
+    
+    if (tri[i] >= 0) {
+      if (dnum[i] == 0) {
+	MathExtra::quat_to_mat(bonus[tri[i]].quat,p);
+	MathExtra::matvec(p,bonus[tri[i]].c1,dc1);
+	MathExtra::matvec(p,bonus[tri[i]].c2,dc2);
+	MathExtra::matvec(p,bonus[tri[i]].c3,dc3);
+	dfirst[i] = ndiscrete;
+	discretize(i,sigma[type[i]][type[i]],dc1,dc2,dc3);
+	dnum[i] = ndiscrete - dfirst[i];
+      }
+    }
+  }
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairTriLJOMP::eval(int iifrom, int iito, ThrData * const thr)
+{
+  int i,j,ii,jj,jnum,itype,jtype;
+  int ni,nj,npi,npj,ifirst,jfirst;
+  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
+  double rsq,r2inv,r6inv,term1,term2,sig,sig3,forcelj;
+  double dxi,dxj,dyi,dyj,dzi,dzj;
+  double xi[3],xj[3],fi[3],fj[3],ti[3],tj[3];
+  int *ilist,*jlist,*numneigh,**firstneigh;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  double * const * const torque = thr->get_torque();
+  const int * const tri = atom->tri;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+      
+      if (rsq >= cutsq[itype][jtype]) continue;
+      
+      // tri/tri interactions = NxN particles
+      // c1,c2,c3 = corner pts of triangle I or J
+      
+      evdwl = 0.0;
+      if (tri[i] >= 0 && tri[j] >= 0) {
+	npi = dnum[i];
+	ifirst = dfirst[i];
+	npj = dnum[j];
+	jfirst = dfirst[j];
+
+	fi[0]=fi[1]=fi[2]=fj[0]=fj[1]=fj[2]=0.0;
+	ti[0]=ti[1]=ti[2]=tj[0]=tj[1]=tj[2]=0.0;
+
+	for (ni = 0; ni < npi; ni++) {
+	  dxi = discrete[ifirst+ni].dx;
+	  dyi = discrete[ifirst+ni].dy;
+	  dzi = discrete[ifirst+ni].dz;
+
+	  for (nj = 0; nj < npj; nj++) {
+	    dxj = discrete[jfirst+nj].dx;
+	    dyj = discrete[jfirst+nj].dy;
+	    dzj = discrete[jfirst+nj].dz;
+
+	    xi[0] = x[i][0] + dxi;
+	    xi[1] = x[i][1] + dyi;
+	    xi[2] = x[i][2] + dzi;
+	    xj[0] = x[j][0] + dxj;
+	    xj[1] = x[j][1] + dyj;
+	    xj[2] = x[j][2] + dzj;
+
+	    delx = xi[0] - xj[0];
+	    dely = xi[1] - xj[1];
+	    delz = xi[2] - xj[2];
+	    rsq = delx*delx + dely*dely + delz*delz;
+ 
+	    sig = 0.5 * (discrete[ifirst+ni].sigma+discrete[jfirst+nj].sigma);
+	    sig3 = sig*sig*sig;
+	    term2 = 24.0*epsilon[itype][jtype] * sig3*sig3;
+	    term1 = 2.0 * term2 * sig3*sig3;
+	    r2inv = 1.0/rsq;
+	    r6inv = r2inv*r2inv*r2inv;
+	    forcelj = r6inv * (term1*r6inv - term2);
+	    fpair = forcelj*r2inv;
+
+	    if (EFLAG) evdwl += r6inv*(term1/12.0*r6inv-term2/6.0);
+
+	    fi[0] += delx*fpair;
+	    fi[1] += dely*fpair;
+	    fi[2] += delz*fpair;
+	    ti[0] += fpair*(dyi*delz - dzi*dely);
+	    ti[1] += fpair*(dzi*delx - dxi*delz);
+	    ti[2] += fpair*(dxi*dely - dyi*delx);
+
+	    if (NEWTON_PAIR || j < nlocal) {
+	      fj[0] -= delx*fpair;
+	      fj[1] -= dely*fpair;
+	      fj[2] -= delz*fpair;
+	      tj[0] -= fpair*(dyj*delz - dzj*dely);
+	      tj[1] -= fpair*(dzj*delx - dxj*delz);
+	      tj[2] -= fpair*(dxj*dely - dyj*delx);
+	    }
+	  }
+	}
+
+	f[i][0] += fi[0];
+	f[i][1] += fi[1];
+	f[i][2] += fi[2];
+	f[j][0] += fj[0];
+	f[j][1] += fj[1];
+	f[j][2] += fj[2];
+	torque[i][0] += ti[0];
+	torque[i][1] += ti[1];
+	torque[i][2] += ti[2];
+	torque[j][0] += tj[0];
+	torque[j][1] += tj[1];
+	torque[j][2] += tj[2];
+
+	// tri/particle interaction = Nx1 particles
+	// c1,c2,c3 = corner pts of triangle I
+
+      } else if (tri[i] >= 0) {
+	npi = dnum[i];
+	ifirst = dfirst[i];
+
+	fi[0]=fi[1]=fi[2]=fj[0]=fj[1]=fj[2]=0.0;
+	ti[0]=ti[1]=ti[2]=tj[0]=tj[1]=tj[2]=0.0;
+
+	for (ni = 0; ni < npi; ni++) {
+	  dxi = discrete[ifirst+ni].dx;
+	  dyi = discrete[ifirst+ni].dy;
+	  dzi = discrete[ifirst+ni].dz;
+
+	  xi[0] = x[i][0] + dxi;
+	  xi[1] = x[i][1] + dyi;
+	  xi[2] = x[i][2] + dzi;
+	  xj[0] = x[j][0];
+	  xj[1] = x[j][1];
+	  xj[2] = x[j][2];
+
+	  delx = xi[0] - xj[0];
+	  dely = xi[1] - xj[1];
+	  delz = xi[2] - xj[2];
+	  rsq = delx*delx + dely*dely + delz*delz;
+
+	  sig = 0.5 * (discrete[ifirst+ni].sigma+sigma[jtype][jtype]);
+	  sig3 = sig*sig*sig;
+	  term2 = 24.0*epsilon[itype][jtype] * sig3*sig3;
+	  term1 = 2.0 * term2 * sig3*sig3;
+	  r2inv = 1.0/rsq;
+	  r6inv = r2inv*r2inv*r2inv;
+	  forcelj = r6inv * (term1*r6inv - term2);
+	  fpair = forcelj*r2inv;
+
+	  if (EFLAG) evdwl += r6inv*(term1/12.0*r6inv-term2/6.0);
+
+	  fi[0] += delx*fpair;
+	  fi[1] += dely*fpair;
+	  fi[2] += delz*fpair;
+	  ti[0] += fpair*(dyi*delz - dzi*dely);
+	  ti[1] += fpair*(dzi*delx - dxi*delz);
+	  ti[2] += fpair*(dxi*dely - dyi*delx);
+
+	  if (NEWTON_PAIR || j < nlocal) {
+	    fj[0] -= delx*fpair;
+	    fj[1] -= dely*fpair;
+	    fj[2] -= delz*fpair;
+	    tj[0] -= fpair*(dyj*delz - dzj*dely);
+	    tj[1] -= fpair*(dzj*delx - dxj*delz);
+	    tj[2] -= fpair*(dxj*dely - dyj*delx);
+	  }
+	}
+
+	f[i][0] += fi[0];
+	f[i][1] += fi[1];
+	f[i][2] += fi[2];
+	f[j][0] += fj[0];
+	f[j][1] += fj[1];
+	f[j][2] += fj[2];
+	torque[i][0] += ti[0];
+	torque[i][1] += ti[1];
+	torque[i][2] += ti[2];
+	torque[j][0] += tj[0];
+	torque[j][1] += tj[1];
+	torque[j][2] += tj[2];
+
+	// particle/tri interaction = Nx1 particles
+	// c1,c2,c3 = corner pts of triangle J
+
+      } else if (tri[j] >= 0) {
+	npj = dnum[j];
+	jfirst = dfirst[j];
+
+	fi[0]=fi[1]=fi[2]=fj[0]=fj[1]=fj[2]=0.0;
+	ti[0]=ti[1]=ti[2]=tj[0]=tj[1]=tj[2]=0.0;
+
+	for (nj = 0; nj < npj; nj++) {
+	  dxj = discrete[jfirst+nj].dx;
+	  dyj = discrete[jfirst+nj].dy;
+	  dzj = discrete[jfirst+nj].dz;
+
+	  xi[0] = x[i][0];
+	  xi[1] = x[i][1];
+	  xi[2] = x[i][2];
+	  xj[0] = x[j][0] + dxj;
+	  xj[1] = x[j][1] + dyj;
+	  xj[2] = x[j][2] + dzj;
+
+	  delx = xi[0] - xj[0];
+	  dely = xi[1] - xj[1];
+	  delz = xi[2] - xj[2];
+	  rsq = delx*delx + dely*dely + delz*delz;
+
+	  sig = 0.5 * (sigma[itype][itype]+discrete[jfirst+nj].sigma);
+	  sig3 = sig*sig*sig;
+	  term2 = 24.0*epsilon[itype][jtype] * sig3*sig3;
+	  term1 = 2.0 * term2 * sig3*sig3;
+	  r2inv = 1.0/rsq;
+	  r6inv = r2inv*r2inv*r2inv;
+	  forcelj = r6inv * (term1*r6inv - term2);
+	  fpair = forcelj*r2inv;
+	  
+	  if (EFLAG) evdwl += r6inv*(term1/12.0*r6inv-term2/6.0);
+
+	  if (EFLAG) evdwl += r6inv*(term1/12.0*r6inv-term2/6.0);
+
+	  fi[0] += delx*fpair;
+	  fi[1] += dely*fpair;
+	  fi[2] += delz*fpair;
+	  ti[0] += fpair*(dyi*delz - dzi*dely);
+	  ti[1] += fpair*(dzi*delx - dxi*delz);
+	  ti[2] += fpair*(dxi*dely - dyi*delx);
+
+	  if (NEWTON_PAIR || j < nlocal) {
+	    fj[0] -= delx*fpair;
+	    fj[1] -= dely*fpair;
+	    fj[2] -= delz*fpair;
+	    tj[0] -= fpair*(dyj*delz - dzj*dely);
+	    tj[1] -= fpair*(dzj*delx - dxj*delz);
+	    tj[2] -= fpair*(dxj*dely - dyj*delx);
+	  }
+	}
+
+	f[i][0] += fi[0];
+	f[i][1] += fi[1];
+	f[i][2] += fi[2];
+	f[j][0] += fj[0];
+	f[j][1] += fj[1];
+	f[j][2] += fj[2];
+	torque[i][0] += ti[0];
+	torque[i][1] += ti[1];
+	torque[i][2] += ti[2];
+	torque[j][0] += tj[0];
+	torque[j][1] += tj[1];
+	torque[j][2] += tj[2];
+
+	// particle/particle interaction = 1x1 particles
+
+      } else {
+	r2inv = 1.0/rsq;
+	r6inv = r2inv*r2inv*r2inv;
+	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
+	fpair = forcelj*r2inv;
+
+	if (EFLAG)
+	  evdwl += r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]);
+
+	f[i][0] += delx*fpair;
+	f[i][1] += dely*fpair;
+	f[i][2] += delz*fpair;
+	if (NEWTON_PAIR || j < nlocal) {
+	  f[j][0] -= delx*fpair;
+	  f[j][1] -= dely*fpair;
+	  f[j][2] -= delz*fpair;
+	}
+      }
+
+      if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
+			       evdwl,0.0,fpair,delx,dely,delz,thr);
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairTriLJOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairTriLJ::memory_usage();
+
+  return bytes;
+}
diff --git a/src/USER-OMP/pair_tri_lj_omp.h b/src/USER-OMP/pair_tri_lj_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..d21c562a80a96fac2ab3dccc1693e0971b8f2b2b
--- /dev/null
+++ b/src/USER-OMP/pair_tri_lj_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(tri/lj/omp,PairTriLJOMP)
+
+#else
+
+#ifndef LMP_PAIR_TRI_LJ_OMP_H
+#define LMP_PAIR_TRI_LJ_OMP_H
+
+#include "pair_tri_lj.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairTriLJOMP : public PairTriLJ, public ThrOMP {
+
+ public:
+  PairTriLJOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pppm_omp.cpp b/src/USER-OMP/pppm_omp.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..50c0b5422b3d02741cafeb2415bbc4b16df6f44d
--- /dev/null
+++ b/src/USER-OMP/pppm_omp.cpp
@@ -0,0 +1,323 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "pppm_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "memory.h"
+
+#include <string.h>
+
+using namespace LAMMPS_NS;
+
+#ifdef FFT_SINGLE
+#define ZEROF 0.0f
+#define ONEF  1.0f
+#else
+#define ZEROF 0.0
+#define ONEF  1.0
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+PPPMOMP::PPPMOMP(LAMMPS *lmp, int narg, char **arg) :
+  PPPM(lmp, narg, arg), ThrOMP(lmp, THR_KSPACE)
+{
+}
+
+
+/* ----------------------------------------------------------------------
+   allocate memory that depends on # of K-vectors and order 
+------------------------------------------------------------------------- */
+
+void PPPMOMP::allocate()
+{
+  PPPM::allocate();
+
+  const int nthreads = comm->nthreads;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none)
+#endif
+  {
+#if defined(_OPENMP)
+    const int tid = omp_get_thread_num();
+#else
+    const int tid = 0;
+#endif
+
+    FFT_SCALAR **rho1d_thr;
+    memory->create2d_offset(rho1d_thr,3,-order/2,order/2,"pppm:rho1d_thr");
+    ThrData *thr = fix->get_thr(tid);
+    thr->init_pppm(static_cast<void *>(rho1d_thr));
+  }
+
+  const int nzend = (nzhi_out-nzlo_out+1)*nthreads + nzlo_out -1;
+
+  // reallocate density brick, so it fits our needs
+  memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out);
+  memory->create3d_offset(density_brick,nzlo_out,nzend,nylo_out,nyhi_out,
+			  nxlo_out,nxhi_out,"pppm:density_brick");
+}
+
+// NOTE: special version of reduce_data for FFT_SCALAR data type.
+// reduce per thread data into the first part of the data
+// array that is used for the non-threaded parts and reset
+// the temporary storage to 0.0. this routine depends on
+// multi-dimensional arrays like force stored in this order
+// x1,y1,z1,x2,y2,z2,...
+// we need to post a barrier to wait until all threads are done
+// with writing to the array .
+static void data_reduce_fft(FFT_SCALAR *dall, int nall, int nthreads, int ndim, int tid)
+{
+#if defined(_OPENMP)
+  // NOOP in non-threaded execution.
+  if (nthreads == 1) return;
+#pragma omp barrier
+  {
+    const int nvals = ndim*nall;
+    const int idelta = nvals/nthreads + 1;
+    const int ifrom = tid*idelta;
+    const int ito   = ((ifrom + idelta) > nvals) ? nvals : (ifrom + idelta);
+
+    for (int m = ifrom; m < ito; ++m) {
+      for (int n = 1; n < nthreads; ++n) {
+	dall[m] += dall[n*nvals + m];
+	dall[n*nvals + m] = 0.0;
+      }
+    }
+  }
+#else
+  // NOOP in non-threaded execution.
+  return;
+#endif
+}
+
+/* ----------------------------------------------------------------------
+   free memory that depends on # of K-vectors and order 
+------------------------------------------------------------------------- */
+
+void PPPMOMP::deallocate()
+{
+  PPPM::deallocate();
+  for (int i=0; i < comm->nthreads; ++i) {
+    ThrData * thr = fix->get_thr(i);
+    FFT_SCALAR ** rho1d_thr = static_cast<FFT_SCALAR **>(thr->get_rho1d());
+    memory->destroy2d_offset(rho1d_thr,-order/2);
+  }
+}
+
+void PPPMOMP::compute(int eflag, int vflag)
+{
+
+  PPPM::compute(eflag,vflag);
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+#if defined(_OPENMP)
+    const int tid = omp_get_thread_num();
+#else
+    const int tid = 0;
+#endif
+    ThrData *thr = fix->get_thr(tid);
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+/* ----------------------------------------------------------------------
+   create discretized "density" on section of global grid due to my particles
+   density(x,y,z) = charge "density" at grid points of my 3d brick
+   (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
+   in global grid 
+------------------------------------------------------------------------- */
+
+void PPPMOMP::make_rho()
+{
+  const double * const q = atom->q;
+  const double * const * const x = atom->x;
+  const int nthreads = comm->nthreads;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none)
+  {  
+    // each thread works on a fixed chunk of atoms.
+    const int tid = omp_get_thread_num();
+    const int inum = atom->nlocal;
+    const int idelta = 1 + inum/nthreads;
+    const int ifrom = tid*idelta;
+    const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
+#else
+    const int tid = 0;
+    const int ifrom = 0;
+    const int ito = atom->nlocal;
+#endif
+
+    int l,m,n,nx,ny,nz,mx,my,mz;
+    FFT_SCALAR dx,dy,dz,x0,y0,z0;
+
+    // set up clear 3d density array
+    const int nzoffs = (nzhi_out-nzlo_out+1)*tid;
+    FFT_SCALAR * const * const * const db = &(density_brick[nzoffs]);
+    memset(&(db[nzlo_out][nylo_out][nxlo_out]),0,ngrid*sizeof(FFT_SCALAR));
+
+    ThrData *thr = fix->get_thr(tid);
+    FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
+
+    // loop over my charges, add their contribution to nearby grid points
+    // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+    // (dx,dy,dz) = distance to "lower left" grid pt
+    // (mx,my,mz) = global coords of moving stencil pt
+
+    for (int i = ifrom; i < ito; i++) {
+
+      nx = part2grid[i][0];
+      ny = part2grid[i][1];
+      nz = part2grid[i][2];
+      dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+      dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+      dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+      compute_rho1d_thr(r1d,dx,dy,dz);
+
+      z0 = delvolinv * q[i];
+      for (n = nlower; n <= nupper; n++) {
+	mz = n+nz;
+	y0 = z0*r1d[2][n];
+	for (m = nlower; m <= nupper; m++) {
+	  my = m+ny;
+	  x0 = y0*r1d[1][m];
+	  for (l = nlower; l <= nupper; l++) {
+	    mx = l+nx;
+	    db[mz][my][mx] += x0*r1d[0][l];
+	  }
+	}
+      }
+    }
+#if defined(_OPENMP)
+    // reduce 3d density array
+    if (nthreads > 1) {
+      sync_threads();
+      data_reduce_fft(&(density_brick[nzlo_out][nylo_out][nxlo_out]),ngrid,nthreads,1,tid);
+    }
+  }
+#endif
+}
+
+/* ----------------------------------------------------------------------
+   interpolate from grid to get electric field & force on my particles 
+------------------------------------------------------------------------- */
+
+void PPPMOMP::fieldforce()
+{
+  // loop over my charges, interpolate electric field from nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+  // ek = 3 components of E-field on particle
+
+  const double * const q = atom->q;
+  const double * const * const x = atom->x;
+  const int nthreads = comm->nthreads;
+  const double qqrd2e = force->qqrd2e;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none)
+  {  
+    // each thread works on a fixed chunk of atoms.
+    const int tid = omp_get_thread_num();
+    const int inum = atom->nlocal;
+    const int idelta = 1 + inum/nthreads;
+    const int ifrom = tid*idelta;
+    const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
+#else
+    const int ifrom = 0;
+    const int ito = atom->nlocal;
+    const int tid = 0;
+#endif
+    ThrData *thr = fix->get_thr(tid);
+    double * const * const f = thr->get_f();
+    FFT_SCALAR * const * const r1d =  static_cast<FFT_SCALAR **>(thr->get_rho1d());
+    
+    int l,m,n,nx,ny,nz,mx,my,mz;
+    FFT_SCALAR dx,dy,dz,x0,y0,z0;
+    FFT_SCALAR ekx,eky,ekz;
+
+    for (int i = ifrom; i < ito; i++) {
+
+      nx = part2grid[i][0];
+      ny = part2grid[i][1];
+      nz = part2grid[i][2];
+      dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+      dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+      dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+      compute_rho1d_thr(r1d,dx,dy,dz);
+
+      ekx = eky = ekz = ZEROF;
+      for (n = nlower; n <= nupper; n++) {
+	mz = n+nz;
+	z0 = r1d[2][n];
+	for (m = nlower; m <= nupper; m++) {
+	  my = m+ny;
+	  y0 = z0*r1d[1][m];
+	  for (l = nlower; l <= nupper; l++) {
+	    mx = l+nx;
+	    x0 = y0*r1d[0][l];
+	    ekx -= x0*vdx_brick[mz][my][mx];
+	    eky -= x0*vdy_brick[mz][my][mx];
+	    ekz -= x0*vdz_brick[mz][my][mx];
+	  }
+	}
+      }
+
+      // convert E-field to force
+      const double qfactor = qqrd2e*scale*q[i];
+      f[i][0] += qfactor*ekx;
+      f[i][1] += qfactor*eky;
+      f[i][2] += qfactor*ekz;
+    }
+#if defined(_OPENMP)
+  }
+#endif
+}
+
+/* ----------------------------------------------------------------------
+   charge assignment into rho1d
+   dx,dy,dz = distance of particle from "lower left" grid point 
+------------------------------------------------------------------------- */
+void PPPMOMP::compute_rho1d_thr(FFT_SCALAR * const * const r1d, const FFT_SCALAR &dx,
+				const FFT_SCALAR &dy, const FFT_SCALAR &dz)
+{
+  int k,l;
+  FFT_SCALAR r1,r2,r3;
+
+  for (k = (1-order)/2; k <= order/2; k++) {
+    r1 = r2 = r3 = ZEROF;
+
+    for (l = order-1; l >= 0; l--) {
+      r1 = rho_coeff[l][k] + r1*dx;
+      r2 = rho_coeff[l][k] + r2*dy;
+      r3 = rho_coeff[l][k] + r3*dz;
+    }
+    r1d[0][k] = r1;
+    r1d[1][k] = r2;
+    r1d[2][k] = r3;
+  }
+}
diff --git a/src/USER-OMP/pppm_omp.h b/src/USER-OMP/pppm_omp.h
new file mode 100644
index 0000000000000000000000000000000000000000..0701cb33965aaac79dc86fac7f1b1f6985f259be
--- /dev/null
+++ b/src/USER-OMP/pppm_omp.h
@@ -0,0 +1,49 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef KSPACE_CLASS
+
+KSpaceStyle(pppm/omp,PPPMOMP)
+
+#else
+
+#ifndef LMP_PPPM_OMP_H
+#define LMP_PPPM_OMP_H
+
+#include "pppm.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+  class PPPMOMP : public PPPM, public ThrOMP {
+ public:
+  PPPMOMP(class LAMMPS *, int, char **);
+  virtual ~PPPMOMP () {};
+
+ protected:
+  virtual void allocate();
+  virtual void deallocate();
+  virtual void compute(int, int);
+  virtual void fieldforce();
+  virtual void make_rho();
+  void compute_rho1d_thr(FFT_SCALAR * const * const, const FFT_SCALAR &,
+			 const FFT_SCALAR &, const FFT_SCALAR &);
+//  void compute_rho_coeff();
+//  void slabcorr(int);
+
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pppm_proxy.cpp b/src/USER-OMP/pppm_proxy.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1b8a9c5453fa11362df8742094552f879402e58d
--- /dev/null
+++ b/src/USER-OMP/pppm_proxy.cpp
@@ -0,0 +1,61 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "pppm_proxy.h"
+
+#include <stdio.h>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PPPMProxy::PPPMProxy(LAMMPS *lmp, int narg, char **arg) :
+  PPPM(lmp, narg, arg), ThrOMP(lmp, THR_KSPACE|THR_PROXY) {}
+
+/* ---------------------------------------------------------------------- */
+
+void PPPMProxy::setup_proxy()
+{
+  PPPM::setup();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PPPMProxy::compute(int eflag, int vflag)
+{
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+#if defined(_OPENMP)
+    const int tid = omp_get_thread_num();
+#else
+    const int tid = 0;
+#endif
+
+    ThrData *thr = fix->get_thr(tid);
+    reduce_thr(this, eflag,vflag,thr);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PPPMProxy::compute_proxy(int eflag, int vflag)
+{
+  PPPM::compute(eflag,vflag);
+}
+
diff --git a/src/USER-OMP/pppm_proxy.h b/src/USER-OMP/pppm_proxy.h
new file mode 100644
index 0000000000000000000000000000000000000000..e7387569d8d58b00216ca1f4bba57a52a1104975
--- /dev/null
+++ b/src/USER-OMP/pppm_proxy.h
@@ -0,0 +1,44 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef KSPACE_CLASS
+
+KSpaceStyle(pppm/proxy,PPPMProxy)
+
+#else
+
+#ifndef LMP_PPPM_PROXY_H
+#define LMP_PPPM_PROXY_H
+
+#include "pppm.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+  class PPPMProxy : public PPPM, public ThrOMP {
+ public:
+  PPPMProxy(class LAMMPS *, int, char **);
+  virtual ~PPPMProxy () {};
+
+  virtual void compute(int, int);
+  virtual void compute_proxy(int eflag, int vflag);
+
+  // nothing to do in setup
+  virtual void setup() {};
+  virtual void setup_proxy();
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pppm_tip4p_proxy.cpp b/src/USER-OMP/pppm_tip4p_proxy.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..fa8d382c27f11c13d4a29b63bcf06207003ffb89
--- /dev/null
+++ b/src/USER-OMP/pppm_tip4p_proxy.cpp
@@ -0,0 +1,61 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "pppm_tip4p_proxy.h"
+
+#include <stdio.h>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PPPMTIP4PProxy::PPPMTIP4PProxy(LAMMPS *lmp, int narg, char **arg) :
+  PPPMTIP4P(lmp, narg, arg), ThrOMP(lmp, THR_KSPACE|THR_PROXY) {}
+
+/* ---------------------------------------------------------------------- */
+
+void PPPMTIP4PProxy::setup_proxy()
+{
+  PPPMTIP4P::setup();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PPPMTIP4PProxy::compute(int eflag, int vflag)
+{
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+#if defined(_OPENMP)
+    const int tid = omp_get_thread_num();
+#else
+    const int tid = 0;
+#endif
+
+    ThrData *thr = fix->get_thr(tid);
+    reduce_thr(this, eflag,vflag,thr);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PPPMTIP4PProxy::compute_proxy(int eflag, int vflag)
+{
+  PPPMTIP4P::compute(eflag,vflag);
+}
+
diff --git a/src/USER-OMP/pppm_tip4p_proxy.h b/src/USER-OMP/pppm_tip4p_proxy.h
new file mode 100644
index 0000000000000000000000000000000000000000..b69cab2ad6ce671a666e0e8a074713ecb08e8c55
--- /dev/null
+++ b/src/USER-OMP/pppm_tip4p_proxy.h
@@ -0,0 +1,44 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef KSPACE_CLASS
+
+KSpaceStyle(pppm/tip4p/proxy,PPPMTIP4PProxy)
+
+#else
+
+#ifndef LMP_PPPM_TIP4P_PROXY_H
+#define LMP_PPPM_TIP4P_PROXY_H
+
+#include "pppm_tip4p.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+  class PPPMTIP4PProxy : public PPPMTIP4P, public ThrOMP {
+ public:
+  PPPMTIP4PProxy(class LAMMPS *, int, char **);
+  virtual ~PPPMTIP4PProxy () {};
+
+  virtual void compute(int, int);
+  virtual void compute_proxy(int eflag, int vflag);
+
+  // nothing to do in setup
+  virtual void setup() {};
+  virtual void setup_proxy();
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/thr_data.cpp b/src/USER-OMP/thr_data.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d6830e64bf9274cdf31bdec30f023c4f693b565e
--- /dev/null
+++ b/src/USER-OMP/thr_data.cpp
@@ -0,0 +1,219 @@
+/* -------------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   per-thread data management for LAMMPS
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "thr_data.h"
+
+#include <string.h>
+#include <stdio.h>
+
+using namespace LAMMPS_NS;
+
+
+ThrData::ThrData(int tid) 
+  : _f(NULL), _torque(NULL), _erforce(NULL), _de(NULL), _drho(NULL), _mu(NULL),
+    _lambda(NULL), _rhoB(NULL), _D_values(NULL), _rho(NULL), _fp(NULL),
+    _rho1d(NULL), _tid(tid) 
+{
+  // nothing else to do here.
+}
+
+
+/* ---------------------------------------------------------------------- */
+
+void ThrData::check_tid(int tid)
+{
+  if (tid != _tid)
+    fprintf(stderr,"WARNING: external and internal tid mismatch %d != %d\n",tid,_tid);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ThrData::init_force(int nall, double **f, double **torque,
+			 double *erforce, double *de, double *drho)
+{
+  eng_vdwl=eng_coul=eng_bond=eng_angle=eng_dihed=eng_imprp=eng_kspce=0.0;
+  memset(virial_pair,0,6*sizeof(double));
+  memset(virial_bond,0,6*sizeof(double));
+  memset(virial_angle,0,6*sizeof(double));
+  memset(virial_dihed,0,6*sizeof(double));
+  memset(virial_imprp,0,6*sizeof(double));
+  memset(virial_kspce,0,6*sizeof(double));
+
+  eatom_pair=eatom_bond=eatom_angle=eatom_dihed=eatom_imprp=eatom_kspce=NULL;
+  vatom_pair=vatom_bond=vatom_angle=vatom_dihed=vatom_imprp=vatom_kspce=NULL;
+
+  _f = f + _tid*nall;
+  memset(&(_f[0][0]),0,nall*3*sizeof(double));
+
+  if (torque) {
+    _torque = torque + _tid*nall;
+    memset(&(_torque[0][0]),0,nall*3*sizeof(double));
+  } else _torque = NULL;
+
+  if (erforce) {
+    _erforce = erforce + _tid*nall;
+    memset(&(_erforce[0]),0,nall*sizeof(double));
+  } else _erforce = NULL;
+
+  if (de) {
+    _de = de + _tid*nall;
+    memset(&(_de[0]),0,nall*sizeof(double));
+  } else _de = NULL;
+
+  if (drho) {
+    _drho = drho + _tid*nall;
+    memset(&(_drho[0]),0,nall*sizeof(double));
+  } else _drho = NULL;
+}
+
+/* ----------------------------------------------------------------------
+   set up and clear out locally managed per atom arrays
+------------------------------------------------------------------------- */
+
+void ThrData::init_eam(int nall, double *rho)
+{
+  _rho = rho + _tid*nall;
+  memset(_rho, 0, nall*sizeof(double));
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ThrData::init_adp(int nall, double *rho, double **mu, double **lambda)
+{
+  init_eam(nall, rho);
+
+  _mu = mu + _tid*nall;
+  _lambda = lambda + _tid*nall;
+  memset(&(_mu[0][0]), 0, nall*3*sizeof(double));
+  memset(&(_lambda[0][0]), 0, nall*6*sizeof(double));
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ThrData::init_cdeam(int nall, double *rho, double *rhoB, double *D_values)
+{
+  init_eam(nall, rho);
+
+  _rhoB = rhoB + _tid*nall;
+  _D_values = D_values + _tid*nall;
+  memset(_rhoB, 0, nall*sizeof(double));
+  memset(_D_values, 0, nall*sizeof(double));
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ThrData::init_eim(int nall, double *rho, double *fp)
+{
+  init_eam(nall, rho);
+
+  _fp = fp + _tid*nall;
+  memset(_fp,0,nall*sizeof(double));
+}
+
+/* ----------------------------------------------------------------------
+   compute global pair virial via summing F dot r over own & ghost atoms
+   at this point, only pairwise forces have been accumulated in atom->f
+------------------------------------------------------------------------- */
+
+void ThrData::virial_fdotr_compute(double **x, int nlocal, int nghost, int nfirst)
+{
+
+  // sum over force on all particles including ghosts
+
+  if (nfirst < 0) {
+    int nall = nlocal + nghost;
+    for (int i = 0; i < nall; i++) {
+      virial_pair[0] += _f[i][0]*x[i][0];
+      virial_pair[1] += _f[i][1]*x[i][1];
+      virial_pair[2] += _f[i][2]*x[i][2];
+      virial_pair[3] += _f[i][1]*x[i][0];
+      virial_pair[4] += _f[i][2]*x[i][0];
+      virial_pair[5] += _f[i][2]*x[i][1];
+    }
+
+  // neighbor includegroup flag is set
+  // sum over force on initial nfirst particles and ghosts
+
+  } else {
+    int nall = nfirst;
+    for (int i = 0; i < nall; i++) {
+      virial_pair[0] += _f[i][0]*x[i][0];
+      virial_pair[1] += _f[i][1]*x[i][1];
+      virial_pair[2] += _f[i][2]*x[i][2];
+      virial_pair[3] += _f[i][1]*x[i][0];
+      virial_pair[4] += _f[i][2]*x[i][0];
+      virial_pair[5] += _f[i][2]*x[i][1];
+    }
+
+    nall = nlocal + nghost;
+    for (int i = nlocal; i < nall; i++) {
+      virial_pair[0] += _f[i][0]*x[i][0];
+      virial_pair[1] += _f[i][1]*x[i][1];
+      virial_pair[2] += _f[i][2]*x[i][2];
+      virial_pair[3] += _f[i][1]*x[i][0];
+      virial_pair[4] += _f[i][2]*x[i][0];
+      virial_pair[5] += _f[i][2]*x[i][1];
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double ThrData::memory_usage() 
+{
+  double bytes = (7 + 6*6) * sizeof(double);
+  bytes += 2 * sizeof(double*);
+  bytes += 4 * sizeof(int);
+
+  return bytes;
+}
+
+/* additional helper functions */
+
+// reduce per thread data into the first part of the data
+// array that is used for the non-threaded parts and reset
+// the temporary storage to 0.0. this routine depends on
+// multi-dimensional arrays like force stored in this order
+// x1,y1,z1,x2,y2,z2,...
+// we need to post a barrier to wait until all threads are done
+// with writing to the array .
+void LAMMPS_NS::data_reduce_thr(double *dall, int nall, int nthreads, int ndim, int tid)
+{
+#if defined(_OPENMP)
+  // NOOP in non-threaded execution.
+  if (nthreads == 1) return;
+#pragma omp barrier
+  {
+    const int nvals = ndim*nall;
+    const int idelta = nvals/nthreads + 1;
+    const int ifrom = tid*idelta;
+    const int ito   = ((ifrom + idelta) > nvals) ? nvals : (ifrom + idelta);
+
+    for (int m = ifrom; m < ito; ++m) {
+      for (int n = 1; n < nthreads; ++n) {
+	dall[m] += dall[n*nvals + m];
+	dall[n*nvals + m] = 0.0;
+      }
+    }
+  }
+#else
+  // NOOP in non-threaded execution.
+  return;
+#endif
+}
+
diff --git a/src/USER-OMP/thr_data.h b/src/USER-OMP/thr_data.h
new file mode 100644
index 0000000000000000000000000000000000000000..400d5716c5013de05197516d989f7e430ddf8b6f
--- /dev/null
+++ b/src/USER-OMP/thr_data.h
@@ -0,0 +1,132 @@
+/* -*- c++ -*- -------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifndef LMP_THR_DATA_H
+#define LMP_THR_DATA_H
+
+#if defined(_OPENMP)
+#include <omp.h>
+#endif
+
+namespace LAMMPS_NS {
+
+// per thread data accumulators
+// there should be one instance
+// of this class for each thread.
+class ThrData {
+  friend class FixOMP;
+  friend class ThrOMP;
+
+ public:
+  ThrData(int tid);
+  ~ThrData() {};
+
+  void check_tid(int);    // thread id consistency check
+  int get_tid() const { return _tid; }; // our thread id.
+
+  // erase accumulator contents and hook up force arrays
+  void init_force(int, double **, double **, double *, double *, double *);
+
+  // give access to per-thread offset arrays
+  double **get_f() const { return _f; };
+  double **get_torque() const { return _torque; };
+  double *get_de() const { return _de; };
+  double *get_drho() const { return _drho; };
+
+  // setup and erase per atom arrays
+  void init_adp(int, double *, double **, double **); // ADP (+ EAM)
+  void init_cdeam(int, double *, double *, double *); // CDEAM (+ EAM)
+  void init_eam(int, double *);                       // EAM 
+  void init_eim(int, double *, double *);             // EIM (+ EAM)
+
+  void init_pppm(void *r1d) { _rho1d = r1d; };
+
+  // access methods for arrays that we handle in this class
+  double **get_lambda() const { return _lambda; };
+  double **get_mu() const { return _mu; };
+  double *get_D_values() const { return _D_values; };
+  double *get_fp() const { return _fp; };
+  double *get_rho() const { return _rho; };
+  double *get_rhoB() const { return _rhoB; };
+  void *get_rho1d() const { return _rho1d; };
+
+ private:
+  double eng_vdwl;        // non-bonded non-coulomb energy
+  double eng_coul;        // non-bonded coulomb energy
+  double virial_pair[6];  // virial contribution from non-bonded
+  double *eatom_pair;
+  double **vatom_pair;
+  double eng_bond;        // bond energy
+  double virial_bond[6];  // virial contribution from bonds
+  double *eatom_bond;
+  double **vatom_bond;
+  double eng_angle;       // angle energy
+  double virial_angle[6]; // virial contribution from angles
+  double *eatom_angle;
+  double **vatom_angle;
+  double eng_dihed;       // dihedral energy
+  double virial_dihed[6]; // virial contribution from dihedrals
+  double *eatom_dihed;
+  double **vatom_dihed;
+  double eng_imprp;       // improper energy
+  double virial_imprp[6]; // virial contribution from impropers 
+  double *eatom_imprp;
+  double **vatom_imprp;
+  double eng_kspce;       // kspace energy
+  double virial_kspce[6]; // virial contribution from kspace
+  double *eatom_kspce;
+  double **vatom_kspce;
+
+  // per thread segments of various force or similar arrays
+
+  // these are maintained by atom styles
+  double **_f;
+  double **_torque;
+  double *_erforce;
+  double *_de;
+  double *_drho;
+
+  // these are maintained by individual pair styles
+  double **_mu, **_lambda;   // ADP (+ EAM)
+  double *_rhoB, *_D_values; // CDEAM (+ EAM)
+  double *_rho;              // EAM
+  double *_fp;               // EIM (+ EAM)
+
+  // this is for pppm/omp
+  void *_rho1d;
+
+  // my thread id
+  const int _tid;
+
+ public:
+  // compute global per thread virial contribution from per-thread force
+  void virial_fdotr_compute(double **, int, int, int);
+
+  double memory_usage();
+
+ // disabled default methods
+ private:
+  ThrData() : _tid(-1) {};
+};
+
+////////////////////////////////////////////////////////////////////////
+//  helper functions operating on data replicated for thread support  //
+////////////////////////////////////////////////////////////////////////
+// generic per thread data reduction for continous arrays of nthreads*nmax size
+void data_reduce_thr(double *, int, int, int, int);
+}
+#endif