diff --git a/src/USER-UEF/fix_nh_uef.cpp b/src/USER-UEF/fix_nh_uef.cpp
index cd0b2ba2683742ab1140ce3a41e0e7c506a40a33..bfa45492864ff4708fb0e22167b04b69ca95cbc1 100644
--- a/src/USER-UEF/fix_nh_uef.cpp
+++ b/src/USER-UEF/fix_nh_uef.cpp
@@ -536,10 +536,26 @@ void FixNHUef::pre_exchange()
     rotate_x(rot);
     rotate_f(rot);
 
-    // put all atoms in the new box
-    double **x = atom->x;
+    // this is a generalization of what is done in domain->image_flip(...)
+    int ri[3][3];
+    uefbox->get_inverse_cob(ri);
     imageint *image = atom->image;
     int nlocal = atom->nlocal;
+    for (int i=0; i<nlocal; i++) {
+      int iold[3],inew[3];
+      iold[0] = (image[i] & IMGMASK) - IMGMAX;
+      iold[1] = (image[i] >> IMGBITS & IMGMASK) - IMGMAX;
+      iold[2] = (image[i] >> IMG2BITS) - IMGMAX;
+      inew[0] = ri[0][0]*iold[0] + ri[0][1]*iold[1] + ri[0][2]*iold[2];
+      inew[1] = ri[1][0]*iold[0] + ri[1][1]*iold[1] + ri[1][2]*iold[2];
+      inew[2] = ri[2][0]*iold[0] + ri[2][1]*iold[1] + ri[2][2]*iold[2];
+      image[i] = ((imageint) (inew[0] + IMGMAX) & IMGMASK) |
+        (((imageint) (inew[1] + IMGMAX) & IMGMASK) << IMGBITS) |
+        (((imageint) (inew[2] + IMGMAX) & IMGMASK) << IMG2BITS);
+    }
+
+    // put all atoms in the new box
+    double **x = atom->x;
     for (int i=0; i<nlocal; i++) domain->remap(x[i],image[i]);
 
     // move atoms to the right processors
diff --git a/src/USER-UEF/uef_utils.cpp b/src/USER-UEF/uef_utils.cpp
index a5498d605f3d28291d91dcc320a4cd41dd999ce2..a2e6cb291e0cb4c7b4ff48e9d1d181d0b7f65e4a 100644
--- a/src/USER-UEF/uef_utils.cpp
+++ b/src/USER-UEF/uef_utils.cpp
@@ -30,47 +30,54 @@ namespace LAMMPS_NS {
 
 UEFBox::UEFBox()
 {
+
   // initial box (also an inverse eigenvector matrix of automorphisms)
+
   double x = 0.327985277605681;
   double y = 0.591009048506103;
   double z = 0.736976229099578;
   l0[0][0]= z; l0[0][1]= y; l0[0][2]= x;
   l0[1][0]=-x; l0[1][1]= z; l0[1][2]=-y;
   l0[2][0]=-y; l0[2][1]= x; l0[2][2]= z;
+
   // spectra of the two automorpisms (log of eigenvalues)
+
   w1[0]=-1.177725211523360;
   w1[1]=-0.441448620566067;
   w1[2]= 1.619173832089425;
   w2[0]= w1[1];
   w2[1]= w1[2];
   w2[2]= w1[0];
+
   // initialize theta
   // strain = w1 * theta1 + w2 * theta2
-  theta[0]=theta[1]=0;
 
+  theta[0]=theta[1]=0;
 
   //set up the initial box l and change of basis matrix r
+
   for (int k=0;k<3;k++)
-    for (int j=0;j<3;j++)
-    {
+    for (int j=0;j<3;j++) {
       l[k][j] = l0[k][j];
       r[j][k]=(j==k);
+      ri[j][k]=(j==k);
     }
 
   // get the initial rotation and upper triangular matrix
+
   rotation_matrix(rot, lrot ,l);
 
   // this is just a way to calculate the automorphisms
   // themselves, which play a minor role in the calculations
   // it's overkill, but only called once
+
   double t1[3][3];
   double t1i[3][3];
   double t2[3][3];
   double t2i[3][3];
   double l0t[3][3];
   for (int k=0; k<3; ++k)
-    for (int j=0; j<3; ++j)
-    {
+    for (int j=0; j<3; ++j) {
       t1[k][j] = exp(w1[k])*l0[k][j];
       t1i[k][j] = exp(-w1[k])*l0[k][j];
       t2[k][j] = exp(w2[k])*l0[k][j];
@@ -82,8 +89,7 @@ UEFBox::UEFBox()
   mul_m2(l0t,t2);
   mul_m2(l0t,t2i);
   for (int k=0; k<3; ++k)
-    for (int j=0; j<3; ++j)
-    {
+    for (int j=0; j<3; ++j) {
       a1[k][j] = round(t1[k][j]);
       a1i[k][j] = round(t1i[k][j]);
       a2[k][j] = round(t2[k][j]);
@@ -92,6 +98,7 @@ UEFBox::UEFBox()
 
   // winv used to transform between
   // strain increments and theta increments
+
   winv[0][0] = w2[1];
   winv[0][1] = -w2[0];
   winv[1][0] = -w1[1];
@@ -102,7 +109,9 @@ UEFBox::UEFBox()
       winv[k][j] /= d;
 }
 
-// get volume-correct r basis in: basis*cbrt(vol) = q*r
+/* ----------------------------------------------------------------------
+   get volume-correct r basis in: basis*cbrt(vol) = q*r
+------------------------------------------------------------------------- */
 void UEFBox::get_box(double x[3][3], double v)
 {
   v = cbrtf(v);
@@ -111,7 +120,9 @@ void UEFBox::get_box(double x[3][3], double v)
       x[k][j] = lrot[k][j]*v;
 }
 
-// get rotation matrix q in: basis = q*r
+/* ----------------------------------------------------------------------
+   get rotation matrix q in: basis = q*r
+------------------------------------------------------------------------- */
 void UEFBox::get_rot(double x[3][3])
 {
   for (int k=0;k<3;k++)
@@ -119,20 +130,32 @@ void UEFBox::get_rot(double x[3][3])
       x[k][j]=rot[k][j];
 }
 
-// diagonal, incompressible deformation
+/* ----------------------------------------------------------------------
+   get inverse change of basis matrix
+------------------------------------------------------------------------- */
+void UEFBox::get_inverse_cob(int x[3][3])
+{
+  for (int k=0;k<3;k++)
+    for (int j=0;j<3;j++)
+      x[k][j]=ri[k][j];
+}
+
+/* ----------------------------------------------------------------------
+   apply diagonal, incompressible deformation
+------------------------------------------------------------------------- */
 void UEFBox::step_deform(const double ex, const double ey)
 {
   // increment theta values used in the reduction
+
   theta[0] +=winv[0][0]*ex + winv[0][1]*ey;
   theta[1] +=winv[1][0]*ex + winv[1][1]*ey;
 
-  // deformation of the box. reduce() needs to
-  // be called regularly or calculation will become
-  // unstable
+  // deformation of the box. reduce() needs to be called regularly or 
+  // calculation will become unstable
+
   double eps[3];
   eps[0]=ex; eps[1] = ey; eps[2] = -ex-ey;
-  for (int k=0;k<3;k++)
-  {
+  for (int k=0;k<3;k++) {
     eps[k] = exp(eps[k]);
     l[k][0] = eps[k]*l[k][0];
     l[k][1] = eps[k]*l[k][1];
@@ -140,68 +163,84 @@ void UEFBox::step_deform(const double ex, const double ey)
   }
   rotation_matrix(rot,lrot, l);
 }
-// reuduce the current basis
+
+/* ----------------------------------------------------------------------
+   reduce the current basis
+------------------------------------------------------------------------- */
 bool UEFBox::reduce()
 {
-  // determine how many times to apply the automorphisms
-  // and find new theta values
+  // determine how many times to apply the automorphisms and find new theta 
+  // values
+
   int f1 = round(theta[0]);
   int f2 = round(theta[1]);
   theta[0] -= f1;
   theta[1] -= f2;
 
-  // store old change or basis matrix to determine if it
-  // changes
+  // store old change or basis matrix to determine if it changes
+
   int r0[3][3];
   for (int k=0;k<3;k++)
     for (int j=0;j<3;j++)
       r0[k][j]=r[k][j];
 
-  // this modifies the old change basis matrix to
-  // handle the case where the automorphism transforms
-  // the box but the reduced basis doesn't change
+  // this modifies the old change basis matrix to handle the case where the 
+  // automorphism transforms the box but the reduced basis doesn't change
   // (r0 should still equal r at the end)
+
   if (f1 > 0) for (int k=0;k<f1;k++) mul_m2 (a1,r0);
   if (f1 < 0) for (int k=0;k<-f1;k++) mul_m2 (a1i,r0);
   if (f2 > 0) for (int k=0;k<f2;k++) mul_m2 (a2,r0);
   if (f2 < 0) for (int k=0;k<-f2;k++) mul_m2 (a2i,r0);
 
   // robust reduction to the box defined by Dobson
-  for (int k=0;k<3;k++)
-  {
+
+  for (int k=0;k<3;k++) {
     double eps = exp(theta[0]*w1[k]+theta[1]*w2[k]);
     l[k][0] = eps*l0[k][0];
     l[k][1] = eps*l0[k][1];
     l[k][2] = eps*l0[k][2];
   }
+
   // further reduce the box using greedy reduction and check
   // if it changed from the last step using the change of basis
   // matrices r and r0
-  greedy(l,r);
+
+  greedy(l,r,ri);
+
+  // multiplying the inverse by the old change of basis matrix gives
+  // the inverse of the transformation itself (should be identity if
+  // no reduction takes place). This is used for image flags only.
+
+  mul_m1(ri,r0);
   rotation_matrix(rot,lrot, l);
   return !mat_same(r,r0);
 }
+
+/* ----------------------------------------------------------------------
+   set the strain to a specific value
+------------------------------------------------------------------------- */
 void UEFBox::set_strain(const double ex, const double ey)
 {
-  theta[0]  =winv[0][0]*ex + winv[0][1]*ey;
-  theta[1]  =winv[1][0]*ex + winv[1][1]*ey;
+  theta[0]  = winv[0][0]*ex + winv[0][1]*ey;
+  theta[1]  = winv[1][0]*ex + winv[1][1]*ey;
   theta[0] -= round(theta[0]);
   theta[1] -= round(theta[1]);
 
-  for (int k=0;k<3;k++)
-  {
+  for (int k=0;k<3;k++) {
     double eps = exp(theta[0]*w1[k]+theta[1]*w2[k]);
     l[k][0] = eps*l0[k][0];
     l[k][1] = eps*l0[k][1];
     l[k][2] = eps*l0[k][2];
   }
-  greedy(l,r);
+  greedy(l,r,ri);
   rotation_matrix(rot,lrot, l);
 }
 
-// this is just qr reduction using householder reflections
-// m is input matrix, q is a rotation, r is upper triangular
-// q*m = r
+/* ----------------------------------------------------------------------
+   qr reduction using householder reflections
+   q*m = r. q is orthogonal. m is input matrix. r is upper triangular
+------------------------------------------------------------------------- */
 void rotation_matrix(double q[3][3], double r[3][3], const double m[3][3])
 {
   for (int k=0;k<3;k++)
@@ -217,8 +256,7 @@ void rotation_matrix(double q[3][3], double r[3][3], const double m[3][3])
   v[0] /= a; v[1] /= a; v[2] /= a;
   double qt[3][3];
   for (int k=0;k<3;k++)
-    for (int j=0;j<3;j++)
-    {
+    for (int j=0;j<3;j++) {
       qt[k][j] = (k==j) - 2*v[k]*v[j];
       q[k][j]= qt[k][j];
     }
@@ -235,38 +273,42 @@ void rotation_matrix(double q[3][3], double r[3][3], const double m[3][3])
       qt[k][j] = (k==j) - 2*v[k]*v[j];
   mul_m2(qt,r);
   mul_m2(qt,q);
+
   // this makes r have positive diagonals
   // q*m = r <==> (-q)*m = (-r) will hold row-wise
+
   if (r[0][0] < 0){ neg_row(q,0); neg_row(r,0); }
   if (r[1][1] < 0){ neg_row(q,1); neg_row(r,1); }
   if (r[2][2] < 0){ neg_row(q,2); neg_row(r,2); }
 }
 
-
-
-//sort columns in order of increasing length
-void col_sort(double b[3][3],int r[3][3])
+/* ----------------------------------------------------------------------
+   sort columns of b in order of increasing length
+   mimic column operations on ri and r
+------------------------------------------------------------------------- */
+void col_sort(double b[3][3],int r[3][3],int ri[3][3])
 {
-  if (col_prod(b,0,0)>col_prod(b,1,1))
-  {
+  if (col_prod(b,0,0)>col_prod(b,1,1)) {
     col_swap(b,0,1);
     col_swap(r,0,1);
+    col_swap(ri,0,1);
   }
-  if (col_prod(b,0,0)>col_prod(b,2,2))
-  {
+  if (col_prod(b,0,0)>col_prod(b,2,2)) {
     col_swap(b,0,2);
     col_swap(r,0,2);
+    col_swap(ri,0,2);
   }
-  if (col_prod(b,1,1)>col_prod(b,2,2))
-  {
+  if (col_prod(b,1,1)>col_prod(b,2,2)) {
     col_swap(b,1,2);
     col_swap(r,1,2);
+    col_swap(ri,1,2);
   }
 }
 
-
-// 1-2 reduction (Graham-Schmidt)
-void red12(double b[3][3],int r[3][3])
+/* ----------------------------------------------------------------------
+   1-2 reduction (Graham-Schmidt)
+------------------------------------------------------------------------- */
+void red12(double b[3][3],int r[3][3],int ri[3][3])
 {
   int y = round(col_prod(b,0,1)/col_prod(b,0,0));
   b[0][1] -= y*b[0][0];
@@ -276,16 +318,23 @@ void red12(double b[3][3],int r[3][3])
   r[0][1] -= y*r[0][0];
   r[1][1] -= y*r[1][0];
   r[2][1] -= y*r[2][0];
-  if (col_prod(b,1,1) < col_prod(b,0,0))
-  {
+
+  ri[0][0] += y*ri[0][1];
+  ri[1][0] += y*ri[1][1];
+  ri[2][0] += y*ri[2][1];
+
+  if (col_prod(b,1,1) < col_prod(b,0,0)) {
     col_swap(b,0,1);
     col_swap(r,0,1);
-    red12(b,r);
+    col_swap(ri,0,1);
+    red12(b,r,ri);
   }
 }
 
-// The Semaev condition for a 3-reduced basis
-void red3(double b[3][3], int r[3][3])
+/* ----------------------------------------------------------------------
+   Apply the Semaev condition for a 3-reduced basis
+------------------------------------------------------------------------- */
+void red3(double b[3][3], int r[3][3], int ri[3][3])
 {
   double b11 = col_prod(b,0,0);
   double b22 = col_prod(b,1,1);
@@ -304,63 +353,97 @@ void red3(double b[3][3], int r[3][3])
   x1v[0] = floor(y1); x1v[1] = x1v[0]+1;
   x2v[0] = floor(y2); x2v[1] = x2v[0]+1;
   for (int k=0;k<2;k++)
-    for (int j=0;j<2;j++)
-    {
+    for (int j=0;j<2;j++) {
       double a[3];
       a[0] = b[0][2] + x1v[k]*b[0][0] + x2v[j]*b[0][1];
       a[1] = b[1][2] + x1v[k]*b[1][0] + x2v[j]*b[1][1];
       a[2] = b[2][2] + x1v[k]*b[2][0] + x2v[j]*b[2][1];
       double val=a[0]*a[0]+a[1]*a[1]+a[2]*a[2];
-      if (val<min)
-      {
+      if (val<min) {
         min = val;
         x1 = x1v[k];
         x2 = x2v[j];
       }
     }
-  if (x1 || x2)
-  {
+  if (x1 || x2) {
     b[0][2] += x1*b[0][0] + x2*b[0][1];
     b[1][2] += x1*b[1][0] + x2*b[1][1];
     b[2][2] += x1*b[2][0] + x2*b[2][1];
     r[0][2] += x1*r[0][0] + x2*r[0][1];
     r[1][2] += x1*r[1][0] + x2*r[1][1];
     r[2][2] += x1*r[2][0] + x2*r[2][1];
-    greedy_recurse(b,r); // note the recursion step is here
+    ri[0][0] += -x1*ri[0][2];
+    ri[1][0] += -x1*ri[1][2];
+    ri[2][0] += -x1*ri[2][2];
+    ri[0][1] += -x2*ri[0][2];
+    ri[1][1] += -x2*ri[1][2];
+    ri[2][1] += -x2*ri[2][2];
+    greedy_recurse(b,r,ri); // note the recursion step is here
   }
 }
 
-// the meat of the greedy reduction algorithm
-void greedy_recurse(double b[3][3], int r[3][3])
+/* ----------------------------------------------------------------------
+   the meat of the greedy reduction algorithm
+------------------------------------------------------------------------- */
+void greedy_recurse(double b[3][3], int r[3][3], int ri[3][3])
 {
-  col_sort(b,r);
-  red12(b,r);
-  red3(b,r); // recursive caller
+  col_sort(b,r,ri);
+  red12(b,r,ri);
+  red3(b,r,ri); // recursive caller
 }
 
-// set r (change of basis) to be identity then reduce basis and make it unique
-void greedy(double b[3][3],int r[3][3])
+/* ----------------------------------------------------------------------
+   reduce the basis b. also output the change of basis matrix r and its
+   inverse ri
+------------------------------------------------------------------------- */
+void greedy(double b[3][3],int r[3][3],int ri[3][3])
 {
   r[0][1]=r[0][2]=r[1][0]=r[1][2]=r[2][0]=r[2][1]=0;
   r[0][0]=r[1][1]=r[2][2]=1;
-  greedy_recurse(b,r);
-  make_unique(b,r);
+  ri[0][1]=ri[0][2]=ri[1][0]=ri[1][2]=ri[2][0]=ri[2][1]=0;
+  ri[0][0]=ri[1][1]=ri[2][2]=1;
+  greedy_recurse(b,r,ri);
+  make_unique(b,r,ri);
+  transpose(ri);
 }
 
-// A reduced basis isn't unique. This procedure will make it
-// "more" unique. Degenerate cases are possible, but unlikely
-// with floating point math.
-void make_unique(double b[3][3], int r[3][3])
+/* ----------------------------------------------------------------------
+   A reduced basis isn't unique. This procedure will make it
+   "more" unique. Degenerate cases are possible, but unlikely
+   with floating point math.
+------------------------------------------------------------------------- */
+void make_unique(double b[3][3], int r[3][3], int ri[3][3])
 {
-  if (fabs(b[0][0]) < fabs(b[0][1]))
-  { col_swap(b,0,1); col_swap(r,0,1); }
-  if (fabs(b[0][0]) < fabs(b[0][2]))
-  { col_swap(b,0,2); col_swap(r,0,2); }
-  if (fabs(b[1][1]) < fabs(b[1][2]))
-  { col_swap(b,1,2); col_swap(r,1,2); }
-
-  if (b[0][0] < 0){ neg_col(b,0); neg_col(r,0); }
-  if (b[1][1] < 0){ neg_col(b,1); neg_col(r,1); }
-  if (det(b) < 0){ neg_col(b,2); neg_col(r,2); }
+  if (fabs(b[0][0]) < fabs(b[0][1])) {
+    col_swap(b,0,1);
+    col_swap(r,0,1);
+    col_swap(ri,0,1); 
+  }
+  if (fabs(b[0][0]) < fabs(b[0][2])) {
+    col_swap(b,0,2);
+    col_swap(r,0,2);
+    col_swap(ri,0,2);
+  }
+  if (fabs(b[1][1]) < fabs(b[1][2])) {
+    col_swap(b,1,2);
+    col_swap(r,1,2);
+    col_swap(ri,1,2);
+  }
+
+  if (b[0][0] < 0) {
+    neg_col(b,0);
+    neg_col(r,0);
+    neg_col(ri,0); 
+  }
+  if (b[1][1] < 0) {
+    neg_col(b,1);
+    neg_col(r,1);
+    neg_col(ri,1);
+  }
+  if (det(b) < 0) {
+    neg_col(b,2);
+    neg_col(r,2); 
+    neg_col(ri,2); 
+  }
 }
 }}
diff --git a/src/USER-UEF/uef_utils.h b/src/USER-UEF/uef_utils.h
index a16f6fff1a70f1e5e15b3f993efc702deaaf034f..0a1cfcc9b2ca9f897d9bf4caed142644d24cf3fe 100644
--- a/src/USER-UEF/uef_utils.h
+++ b/src/USER-UEF/uef_utils.h
@@ -27,26 +27,27 @@ class UEFBox
     bool reduce();
     void get_box(double[3][3], double);
     void get_rot(double[3][3]);
+    void get_inverse_cob(int[3][3]);
   private:
     double l0[3][3]; // initial basis
-    double w1[3],w2[3], winv[3][3]; // omega1 and omega2 (spectra of automorphisms)
-    //double edot[3], delta[2];
+    double w1[3],w2[3],winv[3][3];//omega1 and omega2 (spectra of automorphisms)
     double theta[2];
     double l[3][3], rot[3][3], lrot[3][3];
-    int r[3][3],a1[3][3],a2[3][3],a1i[3][3],a2i[3][3];
+    int r[3][3],ri[3][3],a1[3][3],a2[3][3],a1i[3][3],a2i[3][3];
 };
 
-
 // lattice reduction routines
-void greedy(double[3][3],int[3][3]);
-void col_sort(double[3][3],int[3][3]);
-void red12(double[3][3],int[3][3]);
-void greedy_recurse(double[3][3],int[3][3]);
-void red3(double [3][3],int r[3][3]);
-void make_unique(double[3][3],int[3][3]);
+
+void greedy(double[3][3],int[3][3],int[3][3]);
+void col_sort(double[3][3],int[3][3],int[3][3]);
+void red12(double[3][3],int[3][3],int[3][3]);
+void greedy_recurse(double[3][3],int[3][3],int[3][3]);
+void red3(double [3][3],int r[3][3],int[3][3]);
+void make_unique(double[3][3],int[3][3],int[3][3]);
 void rotation_matrix(double[3][3],double[3][3],const double [3][3]);
 
 // A few utility functions for 3x3 arrays
+
 template<typename T>
 T col_prod(T x[3][3], int c1, int c2)
 {
@@ -56,8 +57,7 @@ T col_prod(T x[3][3], int c1, int c2)
 template<typename T>
 void col_swap(T x[3][3], int c1, int c2)
 {
-  for (int k=0;k<3;k++)
-  {
+  for (int k=0;k<3;k++) {
     T t = x[k][c2];
     x[k][c2]=x[k][c1];
     x[k][c1]=t;
@@ -101,9 +101,21 @@ bool mat_same(T x1[3][3], T x2[3][3])
 }
 
 template<typename T>
-void mul_m1(T m1[3][3], const T m2[3][3])
+void transpose(T m[3][3])
 {
   T t[3][3];
+  for (int k=0;k<3;k++)
+    for (int j=k+1;j<3;j++) {
+      T x = m[k][j];
+      m[k][j] = m[j][k];
+      m[j][k] = x;
+    }
+}
+
+template<typename T1,typename T2>
+void mul_m1(T1 m1[3][3], const T2 m2[3][3])
+{
+  T1 t[3][3];
   for (int k=0;k<3;k++)
     for (int j=0;j<3;j++)
       t[k][j]=m1[k][j];
@@ -113,10 +125,10 @@ void mul_m1(T m1[3][3], const T m2[3][3])
       m1[k][j] = t[k][0]*m2[0][j] + t[k][1]*m2[1][j] + t[k][2]*m2[2][j];
 }
 
-template<typename T>
-void mul_m2(const T m1[3][3], T m2[3][3])
+template<typename T1, typename T2>
+void mul_m2(const T1 m1[3][3], T2 m2[3][3])
 {
-  T t[3][3];
+  T2 t[3][3];
   for (int k=0;k<3;k++)
     for (int j=0;j<3;j++)
       t[k][j]=m2[k][j];