diff --git a/doc/src/fix_bond_react.txt b/doc/src/fix_bond_react.txt
index 8f71cd14ec7c4a405cd6f752db30ba7b37100e14..9c82e2f0ff2d4af88051c3a5483d2d9fd7d5bb14 100644
--- a/doc/src/fix_bond_react.txt
+++ b/doc/src/fix_bond_react.txt
@@ -36,10 +36,12 @@ react = mandatory argument indicating new reaction specification :l
   template-ID(post-reacted) = ID of a molecule template containing post-reaction topology :l
   map_file = name of file specifying corresponding atom-IDs in the pre- and post-reacted templates :l
   zero or more individual keyword/value pairs may be appended to each react argument :l
-  individual_keyword = {prob} or {stabilize_steps} or {update_edges} :l
+  individual_keyword = {prob} or {max_rxn} or {stabilize_steps} or {update_edges} :l
     {prob} values = fraction seed
       fraction = initiate reaction with this probability if otherwise eligible
       seed = random number seed (positive integer)
+    {max_rxn} value = N
+      N = maximum number of reactions allowed to occur
     {stabilize_steps} value = timesteps
       timesteps = number of timesteps to apply the internally-created "nve/limit"_fix_nve_limit.html fix to reacting atoms
     {update_edges} value = {none} or {charges} or {custom}
@@ -285,7 +287,8 @@ The {prob} keyword can affect whether an eligible reaction actually
 occurs. The fraction setting must be a value between 0.0 and 1.0. A
 uniform random number between 0.0 and 1.0 is generated and the
 eligible reaction only occurs if the random number is less than the
-fraction.
+fraction. Up to N reactions are permitted to occur, as optionally
+specified by the {max_rxn} keyword.
 
 The {stabilize_steps} keyword allows for the specification of how many
 timesteps a reaction site is stabilized before being returned to the
diff --git a/src/KOKKOS/pair_snap_kokkos_impl.h b/src/KOKKOS/pair_snap_kokkos_impl.h
index c452042cfecbd5730a2a35cfda2c5fc64e5eaecf..998e75fabe80ffa69b54c655ee42917ff47b0fc4 100644
--- a/src/KOKKOS/pair_snap_kokkos_impl.h
+++ b/src/KOKKOS/pair_snap_kokkos_impl.h
@@ -256,7 +256,6 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
 
   if (vflag_fdotr) pair_virial_fdotr_compute(this);
 
-
   if (eflag_atom) {
     k_eatom.template modify<DeviceType>();
     k_eatom.template sync<LMPHostType>();
@@ -275,8 +274,8 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
 
   // free duplicated memory
   if (need_dup) {
-    dup_f            = decltype(dup_f)();
-    dup_vatom        = decltype(dup_vatom)();
+    dup_f     = decltype(dup_f)();
+    dup_vatom = decltype(dup_vatom)();
   }
 }
 
@@ -453,6 +452,13 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const
   //t4 += timer.seconds(); timer.reset();
   team.team_barrier();
 
+  if (quadraticflag) {
+    my_sna.compute_bi(team);
+    team.team_barrier();
+    my_sna.copy_bi2bvec(team);
+    team.team_barrier();
+  }
+
   // for neighbors of I within cutoff:
   // compute dUi/drj and dBi/drj
   // Fij = dEi/dRj = -dEi/dRi => add to Fi, subtract from Fj
@@ -472,10 +478,6 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const
     my_sna.compute_dbidrj(team);
     //t7 += timer2.seconds(); timer2.reset();
     my_sna.copy_dbi2dbvec(team);
-    if (quadraticflag) {
-      my_sna.compute_bi(team);
-      my_sna.copy_bi2bvec(team);
-    }
 
     Kokkos::single(Kokkos::PerThread(team), [&] (){
     F_FLOAT fij[3];
@@ -536,10 +538,10 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const
     a_f(j,1) -= fij[1];
     a_f(j,2) -= fij[2];
 
-    // tally per-atom virial contribution
+    // tally global and per-atom virial contribution
 
     if (EVFLAG) {
-      if (vflag) {
+      if (vflag_either) {
         v_tally_xyz<NEIGHFLAG>(ev,i,j,
           fij[0],fij[1],fij[2],
           -my_sna.rij(jj,0),-my_sna.rij(jj,1),
@@ -554,11 +556,13 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const
   // tally energy contribution
 
   if (EVFLAG) {
-    if (eflag) {
+    if (eflag_either) {
 
       if (!quadraticflag) {
         my_sna.compute_bi(team);
+        team.team_barrier();
         my_sna.copy_bi2bvec(team);
+        team.team_barrier();
       }
 
       // E = beta.B + 0.5*B^t.alpha.B
@@ -566,14 +570,15 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const
       // coeff[k] = alpha_ii or
       // coeff[k] = alpha_ij = alpha_ji, j != i
 
-      if (team.team_rank() == 0)
-      Kokkos::single(Kokkos::PerThread(team), [&] () {
+      Kokkos::single(Kokkos::PerTeam(team), [&] () {
+
+        // evdwl = energy of atom I, sum over coeffs_k * Bi_k
 
-      // evdwl = energy of atom I, sum over coeffs_k * Bi_k
+        double evdwl = d_coeffi[0];
 
-      double evdwl = d_coeffi[0];
+        // linear contributions
+        // could use thread vector range on this loop
 
-      // linear contributions
         for (int k = 1; k <= ncoeff; k++)
           evdwl += d_coeffi[k]*my_sna.bvec[k-1];
 
@@ -589,11 +594,10 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const
             }
           }
         }
-//        ev_tally_full(i,2.0*evdwl,0.0,0.0,0.0,0.0,0.0);
-        if (eflag_either) {
-          if (eflag_global) ev.evdwl += evdwl;
-          if (eflag_atom) d_eatom[i] += evdwl;
-        }
+
+        //ev_tally_full(i,2.0*evdwl,0.0,0.0,0.0,0.0,0.0);
+        if (eflag_global) ev.evdwl += evdwl;
+        if (eflag_atom) d_eatom[i] += evdwl;
       });
     }
   }
diff --git a/src/KOKKOS/sna_kokkos_impl.h b/src/KOKKOS/sna_kokkos_impl.h
index 25561fef5d0decf60f4ab6d74d3e8cf20693c9ff..6a19c578293665060cc38a8eec41a10a7747b3c0 100644
--- a/src/KOKKOS/sna_kokkos_impl.h
+++ b/src/KOKKOS/sna_kokkos_impl.h
@@ -327,29 +327,40 @@ void SNAKokkos<DeviceType>::compute_bi(const typename Kokkos::TeamPolicy<DeviceT
   clock_gettime(CLOCK_REALTIME, &starttime);
 #endif
 
-  Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,idxj_max),
+  Kokkos::parallel_for(Kokkos::TeamThreadRange(team,idxj_full_max),
       [&] (const int& idx) {
-    const int j1 = idxj(idx).j1;
-    const int j2 = idxj(idx).j2;
-    const int j =  idxj(idx).j;
-    double b_j1_j2_j = 0.0;
+    const int j1 = idxj_full(idx).j1;
+    const int j2 = idxj_full(idx).j2;
+    const int j =  idxj_full(idx).j;
 
-    for(int mb = 0; 2*mb < j; mb++)
-      for(int ma = 0; ma <= j; ma++) {
-        b_j1_j2_j +=
+    const int bound = (j+2)/2;
+    double b_j1_j2_j = 0.0;
+    double b_j1_j2_j_temp = 0.0;
+    Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,(j+1)*bound),
+        [&] (const int mbma, double& sum) {
+        //for(int mb = 0; 2*mb <= j; mb++)
+          //for(int ma = 0; ma <= j; ma++) {
+        const int ma = mbma%(j+1);
+        const int mb = mbma/(j+1);
+        if (2*mb == j) return;
+        sum +=
           uarraytot_r(j,ma,mb) * zarray_r(j1,j2,j,mb,ma) +
           uarraytot_i(j,ma,mb) * zarray_i(j1,j2,j,mb,ma);
-      } // end loop over ma, mb
+      },b_j1_j2_j_temp); // end loop over ma, mb
+      b_j1_j2_j += b_j1_j2_j_temp;
 
     // For j even, special treatment for middle column
 
     if (j%2 == 0) {
       const int mb = j/2;
-      for(int ma = 0; ma < mb; ma++) {
-        b_j1_j2_j +=
+      Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team, mb),
+          [&] (const int ma, double& sum) {
+      //for(int ma = 0; ma < mb; ma++) {
+        sum +=
           uarraytot_r(j,ma,mb) * zarray_r(j1,j2,j,mb,ma) +
           uarraytot_i(j,ma,mb) * zarray_i(j1,j2,j,mb,ma);
-      }
+      },b_j1_j2_j_temp); // end loop over ma
+      b_j1_j2_j += b_j1_j2_j_temp;
 
       const int ma = mb;
       b_j1_j2_j +=
@@ -357,11 +368,13 @@ void SNAKokkos<DeviceType>::compute_bi(const typename Kokkos::TeamPolicy<DeviceT
          uarraytot_i(j,ma,mb) * zarray_i(j1,j2,j,mb,ma))*0.5;
     }
 
-    b_j1_j2_j *= 2.0;
-    if (bzero_flag)
-      b_j1_j2_j -= bzero[j];
+    Kokkos::single(Kokkos::PerThread(team), [&] () {
+      b_j1_j2_j *= 2.0;
+      if (bzero_flag)
+        b_j1_j2_j -= bzero[j];
 
-    barray(j1,j2,j) = b_j1_j2_j;
+      barray(j1,j2,j) = b_j1_j2_j;
+    });
   });
       //} // end loop over j
     //} // end loop over j1, j2
@@ -1028,6 +1041,8 @@ void SNAKokkos<DeviceType>::create_team_scratch_arrays(const typename Kokkos::Te
   uarraytot_i_a = uarraytot_i = t_sna_3d(team.team_scratch(1),jdim,jdim,jdim);
   zarray_r = t_sna_5d(team.team_scratch(1),jdim,jdim,jdim,jdim,jdim);
   zarray_i = t_sna_5d(team.team_scratch(1),jdim,jdim,jdim,jdim,jdim);
+  bvec = Kokkos::View<double*, Kokkos::LayoutRight, DeviceType>(team.team_scratch(1),ncoeff);
+  barray = t_sna_3d(team.team_scratch(1),jdim,jdim,jdim);
 
   rij = t_sna_2d(team.team_scratch(1),nmax,3);
   rcutij = t_sna_1d(team.team_scratch(1),nmax);
@@ -1046,6 +1061,8 @@ T_INT SNAKokkos<DeviceType>::size_team_scratch_arrays() {
   size += t_sna_3d::shmem_size(jdim,jdim,jdim); // uarraytot_i_a
   size += t_sna_5d::shmem_size(jdim,jdim,jdim,jdim,jdim); // zarray_r
   size += t_sna_5d::shmem_size(jdim,jdim,jdim,jdim,jdim); // zarray_i
+  size += Kokkos::View<double*, Kokkos::LayoutRight, DeviceType>::shmem_size(ncoeff); // bvec
+  size += t_sna_3d::shmem_size(jdim,jdim,jdim); // barray
 
   size += t_sna_2d::shmem_size(nmax,3); // rij
   size += t_sna_1d::shmem_size(nmax); // rcutij
@@ -1062,8 +1079,6 @@ KOKKOS_INLINE_FUNCTION
 void SNAKokkos<DeviceType>::create_thread_scratch_arrays(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team)
 {
   int jdim = twojmax + 1;
-  bvec = Kokkos::View<double*, Kokkos::LayoutRight, DeviceType>(team.thread_scratch(1),ncoeff);
-  barray = t_sna_3d(team.thread_scratch(1),jdim,jdim,jdim);
 
   dbvec = Kokkos::View<double*[3], Kokkos::LayoutRight, DeviceType>(team.thread_scratch(1),ncoeff);
   dbarray = t_sna_4d(team.thread_scratch(1),jdim,jdim,jdim);
@@ -1079,8 +1094,6 @@ inline
 T_INT SNAKokkos<DeviceType>::size_thread_scratch_arrays() {
   T_INT size = 0;
   int jdim = twojmax + 1;
-  size += Kokkos::View<double*, Kokkos::LayoutRight, DeviceType>::shmem_size(ncoeff); // bvec
-  size += t_sna_3d::shmem_size(jdim,jdim,jdim); // barray
 
   size += Kokkos::View<double*[3], Kokkos::LayoutRight, DeviceType>::shmem_size(ncoeff); // dbvec
   size += t_sna_4d::shmem_size(jdim,jdim,jdim); // dbarray
diff --git a/src/MOLECULE/improper_umbrella.cpp b/src/MOLECULE/improper_umbrella.cpp
index 31d7cba54dca313ae2016fe477b9882e0787f515..8ed3d3a84cd6437a6c0894f9d7140c231234aa75 100644
--- a/src/MOLECULE/improper_umbrella.cpp
+++ b/src/MOLECULE/improper_umbrella.cpp
@@ -189,17 +189,17 @@ void ImproperUmbrella::compute(int eflag, int vflag)
     dahy = ary-c*hry;
     dahz = arz-c*hrz;
 
-    f2[0] = (dhay*vb1z - dhaz*vb1y)*rar;
-    f2[1] = (dhaz*vb1x - dhax*vb1z)*rar;
-    f2[2] = (dhax*vb1y - dhay*vb1x)*rar;
+    f2[0] = (dhay*vb1z - dhaz*vb1y)*rar*a;
+    f2[1] = (dhaz*vb1x - dhax*vb1z)*rar*a;
+    f2[2] = (dhax*vb1y - dhay*vb1x)*rar*a;
 
-    f3[0] = (-dhay*vb2z + dhaz*vb2y)*rar;
-    f3[1] = (-dhaz*vb2x + dhax*vb2z)*rar;
-    f3[2] = (-dhax*vb2y + dhay*vb2x)*rar;
+    f3[0] = (-dhay*vb2z + dhaz*vb2y)*rar*a;
+    f3[1] = (-dhaz*vb2x + dhax*vb2z)*rar*a;
+    f3[2] = (-dhax*vb2y + dhay*vb2x)*rar*a;
 
-    f4[0] = dahx*rhr;
-    f4[1] = dahy*rhr;
-    f4[2] = dahz*rhr;
+    f4[0] = dahx*rhr*a;
+    f4[1] = dahy*rhr*a;
+    f4[2] = dahz*rhr*a;
 
     f1[0] = -(f2[0] + f3[0] + f4[0]);
     f1[1] = -(f2[1] + f3[1] + f4[1]);
@@ -208,27 +208,27 @@ void ImproperUmbrella::compute(int eflag, int vflag)
     // apply force to each of 4 atoms
 
     if (newton_bond || i1 < nlocal) {
-      f[i1][0] += f1[0]*a;
-      f[i1][1] += f1[1]*a;
-      f[i1][2] += f1[2]*a;
+      f[i1][0] += f1[0];
+      f[i1][1] += f1[1];
+      f[i1][2] += f1[2];
     }
 
     if (newton_bond || i2 < nlocal) {
-      f[i2][0] += f3[0]*a;
-      f[i2][1] += f3[1]*a;
-      f[i2][2] += f3[2]*a;
+      f[i2][0] += f3[0];
+      f[i2][1] += f3[1];
+      f[i2][2] += f3[2];
     }
 
     if (newton_bond || i3 < nlocal) {
-      f[i3][0] += f2[0]*a;
-      f[i3][1] += f2[1]*a;
-      f[i3][2] += f2[2]*a;
+      f[i3][0] += f2[0];
+      f[i3][1] += f2[1];
+      f[i3][2] += f2[2];
     }
 
     if (newton_bond || i4 < nlocal) {
-      f[i4][0] += f4[0]*a;
-      f[i4][1] += f4[1]*a;
-      f[i4][2] += f4[2]*a;
+      f[i4][0] += f4[0];
+      f[i4][1] += f4[1];
+      f[i4][2] += f4[2];
     }
 
     if (evflag) {
@@ -247,7 +247,7 @@ void ImproperUmbrella::compute(int eflag, int vflag)
       vb3y = x[i4][1] - x[i3][1];
       vb3z = x[i4][2] - x[i3][2];
 
-      ev_tally(i1,i2,i3,i4,nlocal,newton_bond,eimproper,f1,f3,f4,
+      ev_tally(i1,i2,i3,i4,nlocal,newton_bond,eimproper,f1,f2,f4,
                vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z);
     }
   }
diff --git a/src/USER-MISC/fix_bond_react.cpp b/src/USER-MISC/fix_bond_react.cpp
index e7dc816b8b73c3e4589dae93e5da04f67d32eecd..4b15f50d7d7d5fa0bcfc05d45f2c872d29134e14 100644
--- a/src/USER-MISC/fix_bond_react.cpp
+++ b/src/USER-MISC/fix_bond_react.cpp
@@ -161,6 +161,9 @@ FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) :
   memory->create(unreacted_mol,nreacts,"bond/react:unreacted_mol");
   memory->create(reacted_mol,nreacts,"bond/react:reacted_mol");
   memory->create(fraction,nreacts,"bond/react:fraction");
+  memory->create(max_rxn,nreacts,"bond/react:max_rxn");
+  memory->create(nlocalskips,nreacts,"bond/react:nlocalskips");
+  memory->create(nghostlyskips,nreacts,"bond/react:nghostlyskips");
   memory->create(seed,nreacts,"bond/react:seed");
   memory->create(limit_duration,nreacts,"bond/react:limit_duration");
   memory->create(stabilize_steps_flag,nreacts,"bond/react:stabilize_steps_flag");
@@ -179,6 +182,7 @@ FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) :
   for (int i = 0; i < nreacts; i++) {
     fraction[i] = 1;
     seed[i] = 12345;
+    max_rxn[i] = INT_MAX;
     stabilize_steps_flag[i] = 0;
     update_edges_flag[i] = 0;
     // set default limit duration to 60 timesteps
@@ -244,6 +248,13 @@ FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) :
         if (seed[rxn] <= 0) error->all(FLERR,"Illegal fix bond/react command: "
                                        "probability seed must be positive");
         iarg += 3;
+      } else if (strcmp(arg[iarg],"max_rxn") == 0) {
+	      if (iarg+2 > narg) error->all(FLERR,"Illegal fix bond/react command: "
+	      			                        "'max_rxn' has too few arguments");
+	      max_rxn[rxn] = force->inumeric(FLERR,arg[iarg+1]);
+	      if (max_rxn[rxn] < 0) error->all(FLERR,"Illegal fix bond/react command: "
+	      				                         "'max_rxn' cannot be negative");
+	      iarg += 2;
       } else if (strcmp(arg[iarg],"stabilize_steps") == 0) {
         if (stabilization_flag == 0) error->all(FLERR,"Stabilize_steps keyword "
                                                 "used without stabilization keyword");
@@ -379,9 +390,6 @@ FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) :
 
 FixBondReact::~FixBondReact()
 {
-  // unregister callbacks to this fix from Atom class
-  atom->delete_callback(id,0);
-
   for (int i = 0; i < nreacts; i++) {
     delete random[i];
   }
@@ -396,6 +404,7 @@ FixBondReact::~FixBondReact()
   memory->destroy(edge);
   memory->destroy(equivalences);
   memory->destroy(reverse_equiv);
+  memory->destroy(landlocked_atoms);
   memory->destroy(custom_edges);
   memory->destroy(delete_atoms);
 
@@ -405,6 +414,9 @@ FixBondReact::~FixBondReact()
   memory->destroy(reacted_mol);
   memory->destroy(fraction);
   memory->destroy(seed);
+  memory->destroy(max_rxn);
+  memory->destroy(nlocalskips);
+  memory->destroy(nghostlyskips);
   memory->destroy(limit_duration);
   memory->destroy(stabilize_steps_flag);
   memory->destroy(update_edges_flag);
@@ -434,7 +446,6 @@ FixBondReact::~FixBondReact()
     memory->destroy(restore);
     memory->destroy(glove);
     memory->destroy(pioneers);
-    memory->destroy(landlocked_atoms);
     memory->destroy(local_mega_glove);
     memory->destroy(ghostly_mega_glove);
   }
@@ -452,7 +463,7 @@ FixBondReact::~FixBondReact()
     delete [] id_fix3;
   }
 
-  if (id_fix2 == NULL && modify->nfix) modify->delete_fix(id_fix2);
+  if (id_fix2 && modify->nfix) modify->delete_fix(id_fix2);
   delete [] id_fix2;
 
   delete [] statted_id;
@@ -677,6 +688,8 @@ void FixBondReact::post_integrate()
     reaction_count[i] = 0;
     local_rxn_count[i] = 0;
     ghostly_rxn_count[i] = 0;
+    nlocalskips[i] = 0;
+    nghostlyskips[i] = 0;
   }
 
   if (nevery_check) {
@@ -748,6 +761,7 @@ void FixBondReact::post_integrate()
 
   int j;
   for (rxnID = 0; rxnID < nreacts; rxnID++) {
+    if (max_rxn[rxnID] <= reaction_count_total[rxnID]) continue;
     for (int ii = 0; ii < nall; ii++) {
       partner[ii] = 0;
       finalpartner[ii] = 0;
@@ -1145,14 +1159,43 @@ void FixBondReact::superimpose_algorithm()
 
   MPI_Allreduce(&local_rxn_count[0],&reaction_count[0],nreacts,MPI_INT,MPI_SUM,world);
 
-  for (int i = 0; i < nreacts; i++)
-    reaction_count_total[i] += reaction_count[i];
-
-  // this assumes compute_vector is called from process 0
-  // ...so doesn't bother to bcast ghostly_rxn_count
   if (me == 0)
     for (int i = 0; i < nreacts; i++)
-      reaction_count_total[i] += ghostly_rxn_count[i];
+      reaction_count_total[i] += reaction_count[i] + ghostly_rxn_count[i];
+
+  MPI_Bcast(&reaction_count_total[0], nreacts, MPI_INT, 0, world);
+
+  // check if we overstepped our reaction limit
+  for (int i = 0; i < nreacts; i++) {
+    if (reaction_count_total[i] > max_rxn[i]) {
+      // let's randomly choose rxns to skip, unbiasedly from local and ghostly
+      int local_rxncounts[nprocs];
+      int all_localskips[nprocs];
+      MPI_Gather(&local_rxn_count[i],1,MPI_INT,local_rxncounts,1,MPI_INT,0,world);
+      if (me == 0) {
+        int overstep = reaction_count_total[i] - max_rxn[i];
+        int delta_rxn = reaction_count[i] + ghostly_rxn_count[i];
+        int rxn_by_proc[delta_rxn];
+        for (int j = 0; j < delta_rxn; j++)
+          rxn_by_proc[j] = -1; // corresponds to ghostly
+        int itemp = 0;
+        for (int j = 0; j < nprocs; j++)
+          for (int k = 0; k < local_rxn_count[j]; k++)
+            rxn_by_proc[itemp++] = j;
+        std::random_shuffle(&rxn_by_proc[0],&rxn_by_proc[delta_rxn]);
+        for (int j = 0; j < nprocs; j++)
+          all_localskips[j] = 0;
+        nghostlyskips[i] = 0;
+        for (int j = 0; j < overstep; j++) {
+          if (rxn_by_proc[j] == -1) nghostlyskips[i]++;
+          else all_localskips[rxn_by_proc[j]]++;
+        }
+      }
+      reaction_count_total[i] = max_rxn[i];
+      MPI_Scatter(&all_localskips[0],1,MPI_INT,&nlocalskips[i],1,MPI_INT,0,world);
+      MPI_Bcast(&nghostlyskips[i],1,MPI_INT,0,world);
+    }
+  }
 
   // this updates topology next step
   next_reneighbor = update->ntimestep;
@@ -1948,19 +1991,19 @@ void FixBondReact::glove_ghostcheck()
   // 'ghosts of another' indication taken from comm->sendlist
 
   int ghostly = 0;
-  if (comm->style == 0) {
-    for (int i = 0; i < onemol->natoms; i++) {
-      int ilocal = atom->map(glove[i][1]);
-      if (ilocal >= atom->nlocal || localsendlist[ilocal] == 1) {
-        ghostly = 1;
-        break;
+  #if !defined(MPI_STUBS)
+    if (comm->style == 0) {
+      for (int i = 0; i < onemol->natoms; i++) {
+        int ilocal = atom->map(glove[i][1]);
+        if (ilocal >= atom->nlocal || localsendlist[ilocal] == 1) {
+          ghostly = 1;
+          break;
+        }
       }
-    }
-  } else {
-    #if !defined(MPI_STUBS)
+    } else {
       ghostly = 1;
-    #endif
-  }
+    }
+  #endif
 
   if (ghostly == 1) {
     ghostly_mega_glove[0][ghostly_num_mega] = rxnID;
@@ -2095,18 +2138,26 @@ void FixBondReact::update_everything()
   memory->create(update_mega_glove,max_natoms+1,MAX(local_num_mega,global_megasize),"bond/react:update_mega_glove");
 
   for (int pass = 0; pass < 2; pass++) {
-
+    update_num_mega = 0;
+    int iskip[nreacts];
+    for (int i = 0; i < nreacts; i++) iskip[i] = 0;
     if (pass == 0) {
-      update_num_mega = local_num_mega;
-      for (int i = 0; i < update_num_mega; i++) {
+      for (int i = 0; i < local_num_mega; i++) {
+        rxnID = local_mega_glove[0][i];
+        // reactions already shuffled from dedup procedure, so can skip first N
+        if (iskip[rxnID]++ < nlocalskips[rxnID]) continue;
         for (int j = 0; j < max_natoms+1; j++)
-          update_mega_glove[j][i] = local_mega_glove[j][i];
+          update_mega_glove[j][update_num_mega] = local_mega_glove[j][i];
+        update_num_mega++;
       }
     } else if (pass == 1) {
-      update_num_mega = global_megasize;
       for (int i = 0; i < global_megasize; i++) {
+        rxnID = global_mega_glove[0][i];
+        // reactions already shuffled from dedup procedure, so can skip first N
+        if (iskip[rxnID]++ < nghostlyskips[rxnID]) continue;
         for (int j = 0; j < max_natoms+1; j++)
-          update_mega_glove[j][i] = global_mega_glove[j][i];
+          update_mega_glove[j][update_num_mega] = global_mega_glove[j][i];
+        update_num_mega++;
       }
     }
 
diff --git a/src/USER-MISC/fix_bond_react.h b/src/USER-MISC/fix_bond_react.h
index d6e7b785e7f8662d8d4eca52d5171d3a7a77be3c..fffdfe43694929d4774c68523e22a39ecb60a0fb 100644
--- a/src/USER-MISC/fix_bond_react.h
+++ b/src/USER-MISC/fix_bond_react.h
@@ -55,6 +55,7 @@ class FixBondReact : public Fix {
   int *iatomtype,*jatomtype;
   int *seed;
   double **cutsq,*fraction;
+  int *max_rxn,*nlocalskips,*nghostlyskips;
   tagint lastcheck;
   int stabilization_flag;
   int custom_exclude_flag;
diff --git a/src/USER-MISC/improper_fourier.cpp b/src/USER-MISC/improper_fourier.cpp
index 11478d08eab135f70baa9ab6d69a762b4a302672..927478fa1a9245f640e40562d84033001bd2afba 100644
--- a/src/USER-MISC/improper_fourier.cpp
+++ b/src/USER-MISC/improper_fourier.cpp
@@ -206,17 +206,17 @@ void ImproperFourier::addone(const int &i1,const int &i2,const int &i3,const int
   dahy = ary-c*hry;
   dahz = arz-c*hrz;
 
-  f2[0] = (dhay*vb1z - dhaz*vb1y)*rar;
-  f2[1] = (dhaz*vb1x - dhax*vb1z)*rar;
-  f2[2] = (dhax*vb1y - dhay*vb1x)*rar;
+  f2[0] = (dhay*vb1z - dhaz*vb1y)*rar*a;
+  f2[1] = (dhaz*vb1x - dhax*vb1z)*rar*a;
+  f2[2] = (dhax*vb1y - dhay*vb1x)*rar*a;
 
-  f3[0] = (-dhay*vb2z + dhaz*vb2y)*rar;
-  f3[1] = (-dhaz*vb2x + dhax*vb2z)*rar;
-  f3[2] = (-dhax*vb2y + dhay*vb2x)*rar;
+  f3[0] = (-dhay*vb2z + dhaz*vb2y)*rar*a;
+  f3[1] = (-dhaz*vb2x + dhax*vb2z)*rar*a;
+  f3[2] = (-dhax*vb2y + dhay*vb2x)*rar*a;
 
-  f4[0] = dahx*rhr;
-  f4[1] = dahy*rhr;
-  f4[2] = dahz*rhr;
+  f4[0] = dahx*rhr*a;
+  f4[1] = dahy*rhr*a;
+  f4[2] = dahz*rhr*a;
 
   f1[0] = -(f2[0] + f3[0] + f4[0]);
   f1[1] = -(f2[1] + f3[1] + f4[1]);
@@ -225,32 +225,32 @@ void ImproperFourier::addone(const int &i1,const int &i2,const int &i3,const int
   // apply force to each of 4 atoms
 
   if (newton_bond || i1 < nlocal) {
-    f[i1][0] += f1[0]*a;
-    f[i1][1] += f1[1]*a;
-    f[i1][2] += f1[2]*a;
+    f[i1][0] += f1[0];
+    f[i1][1] += f1[1];
+    f[i1][2] += f1[2];
   }
 
   if (newton_bond || i2 < nlocal) {
-    f[i2][0] += f3[0]*a;
-    f[i2][1] += f3[1]*a;
-    f[i2][2] += f3[2]*a;
+    f[i2][0] += f3[0];
+    f[i2][1] += f3[1];
+    f[i2][2] += f3[2];
   }
 
   if (newton_bond || i3 < nlocal) {
-    f[i3][0] += f2[0]*a;
-    f[i3][1] += f2[1]*a;
-    f[i3][2] += f2[2]*a;
+    f[i3][0] += f2[0];
+    f[i3][1] += f2[1];
+    f[i3][2] += f2[2];
   }
 
   if (newton_bond || i4 < nlocal) {
-    f[i4][0] += f4[0]*a;
-    f[i4][1] += f4[1]*a;
-    f[i4][2] += f4[2]*a;
+    f[i4][0] += f4[0];
+    f[i4][1] += f4[1];
+    f[i4][2] += f4[2];
   }
 
   if (evflag)
-    ev_tally(i1,i2,i3,i4,nlocal,newton_bond,eimproper,f1,f3,f4,
-             vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z);
+      ev_tally(i1,i2,i3,i4,nlocal,newton_bond,eimproper,f1,f2,f4,
+               -vb1x,-vb1y,-vb1z,vb2x-vb1x,vb2y-vb1y,vb2z-vb1z,vb3x-vb2x,vb3y-vb2y,vb3z-vb2z);
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/USER-OMP/improper_fourier_omp.cpp b/src/USER-OMP/improper_fourier_omp.cpp
index aed04003a5eff70550f4d98830145a7557cc22b5..77dd36b64f62d014bf3335178585bed290794b37 100644
--- a/src/USER-OMP/improper_fourier_omp.cpp
+++ b/src/USER-OMP/improper_fourier_omp.cpp
@@ -239,17 +239,17 @@ void ImproperFourierOMP::add1_thr(const int i1,const int i2,
   dahy = ary-c*hry;
   dahz = arz-c*hrz;
 
-  f2[0] = (dhay*vb1z - dhaz*vb1y)*rar;
-  f2[1] = (dhaz*vb1x - dhax*vb1z)*rar;
-  f2[2] = (dhax*vb1y - dhay*vb1x)*rar;
+  f2[0] = (dhay*vb1z - dhaz*vb1y)*rar*a;
+  f2[1] = (dhaz*vb1x - dhax*vb1z)*rar*a;
+  f2[2] = (dhax*vb1y - dhay*vb1x)*rar*a;
 
-  f3[0] = (-dhay*vb2z + dhaz*vb2y)*rar;
-  f3[1] = (-dhaz*vb2x + dhax*vb2z)*rar;
-  f3[2] = (-dhax*vb2y + dhay*vb2x)*rar;
+  f3[0] = (-dhay*vb2z + dhaz*vb2y)*rar*a;
+  f3[1] = (-dhaz*vb2x + dhax*vb2z)*rar*a;
+  f3[2] = (-dhax*vb2y + dhay*vb2x)*rar*a;
 
-  f4[0] = dahx*rhr;
-  f4[1] = dahy*rhr;
-  f4[2] = dahz*rhr;
+  f4[0] = dahx*rhr*a;
+  f4[1] = dahy*rhr*a;
+  f4[2] = dahz*rhr*a;
 
   f1[0] = -(f2[0] + f3[0] + f4[0]);
   f1[1] = -(f2[1] + f3[1] + f4[1]);
@@ -258,30 +258,31 @@ void ImproperFourierOMP::add1_thr(const int i1,const int i2,
   // apply force to each of 4 atoms
 
   if (NEWTON_BOND || i1 < nlocal) {
-    f[i1][0] += f1[0]*a;
-    f[i1][1] += f1[1]*a;
-    f[i1][2] += f1[2]*a;
+    f[i1][0] += f1[0];
+    f[i1][1] += f1[1];
+    f[i1][2] += f1[2];
   }
 
   if (NEWTON_BOND || i2 < nlocal) {
-    f[i2][0] += f3[0]*a;
-    f[i2][1] += f3[1]*a;
-    f[i2][2] += f3[2]*a;
+    f[i2][0] += f3[0];
+    f[i2][1] += f3[1];
+    f[i2][2] += f3[2];
   }
 
   if (NEWTON_BOND || i3 < nlocal) {
-    f[i3][0] += f2[0]*a;
-    f[i3][1] += f2[1]*a;
-    f[i3][2] += f2[2]*a;
+    f[i3][0] += f2[0];
+    f[i3][1] += f2[1];
+    f[i3][2] += f2[2];
   }
 
   if (NEWTON_BOND || i4 < nlocal) {
-    f[i4][0] += f4[0]*a;
-    f[i4][1] += f4[1]*a;
-    f[i4][2] += f4[2]*a;
+    f[i4][0] += f4[0];
+    f[i4][1] += f4[1];
+    f[i4][2] += f4[2];
   }
 
   if (EVFLAG)
-    ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,eimproper,f1,f3,f4,
-                 vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr);
+    ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,eimproper,f1,f2,f4,
+                 -vb1x,-vb1y,-vb1z,vb2x-vb1x,vb2y-vb1y,vb2z-vb1z,vb3x-vb2x,vb3y-vb2y,vb3z-vb2z,thr);
+
 }
diff --git a/src/USER-OMP/improper_umbrella_omp.cpp b/src/USER-OMP/improper_umbrella_omp.cpp
index dc11f24a4de084b07b666c08b76271d135218115..69d41176c64287c55a3663f4db259df03bd5b319 100644
--- a/src/USER-OMP/improper_umbrella_omp.cpp
+++ b/src/USER-OMP/improper_umbrella_omp.cpp
@@ -212,17 +212,17 @@ void ImproperUmbrellaOMP::eval(int nfrom, int nto, ThrData * const thr)
     dahy = ary-c*hry;
     dahz = arz-c*hrz;
 
-    f2[0] = (dhay*vb1z - dhaz*vb1y)*rar;
-    f2[1] = (dhaz*vb1x - dhax*vb1z)*rar;
-    f2[2] = (dhax*vb1y - dhay*vb1x)*rar;
+    f2[0] = (dhay*vb1z - dhaz*vb1y)*rar*a;
+    f2[1] = (dhaz*vb1x - dhax*vb1z)*rar*a;
+    f2[2] = (dhax*vb1y - dhay*vb1x)*rar*a;
 
-    f3[0] = (-dhay*vb2z + dhaz*vb2y)*rar;
-    f3[1] = (-dhaz*vb2x + dhax*vb2z)*rar;
-    f3[2] = (-dhax*vb2y + dhay*vb2x)*rar;
+    f3[0] = (-dhay*vb2z + dhaz*vb2y)*rar*a;
+    f3[1] = (-dhaz*vb2x + dhax*vb2z)*rar*a;
+    f3[2] = (-dhax*vb2y + dhay*vb2x)*rar*a;
 
-    f4[0] = dahx*rhr;
-    f4[1] = dahy*rhr;
-    f4[2] = dahz*rhr;
+    f4[0] = dahx*rhr*a;
+    f4[1] = dahy*rhr*a;
+    f4[2] = dahz*rhr*a;
 
     f1[0] = -(f2[0] + f3[0] + f4[0]);
     f1[1] = -(f2[1] + f3[1] + f4[1]);
@@ -231,27 +231,27 @@ void ImproperUmbrellaOMP::eval(int nfrom, int nto, ThrData * const thr)
     // apply force to each of 4 atoms
 
     if (NEWTON_BOND || i1 < nlocal) {
-      f[i1].x += f1[0]*a;
-      f[i1].y += f1[1]*a;
-      f[i1].z += f1[2]*a;
+      f[i1].x += f1[0];
+      f[i1].y += f1[1];
+      f[i1].z += f1[2];
     }
 
     if (NEWTON_BOND || i2 < nlocal) {
-      f[i2].x += f3[0]*a;
-      f[i2].y += f3[1]*a;
-      f[i2].z += f3[2]*a;
+      f[i2].x += f3[0];
+      f[i2].y += f3[1];
+      f[i2].z += f3[2];
     }
 
     if (NEWTON_BOND || i3 < nlocal) {
-      f[i3].x += f2[0]*a;
-      f[i3].y += f2[1]*a;
-      f[i3].z += f2[2]*a;
+      f[i3].x += f2[0];
+      f[i3].y += f2[1];
+      f[i3].z += f2[2];
     }
 
     if (NEWTON_BOND || i4 < nlocal) {
-      f[i4].x += f4[0]*a;
-      f[i4].y += f4[1]*a;
-      f[i4].z += f4[2]*a;
+      f[i4].x += f4[0];
+      f[i4].y += f4[1];
+      f[i4].z += f4[2];
     }
 
     if (EVFLAG) {
@@ -270,7 +270,7 @@ void ImproperUmbrellaOMP::eval(int nfrom, int nto, ThrData * const thr)
       vb3y = x[i4].y - x[i3].y;
       vb3z = x[i4].z - x[i3].z;
 
-      ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,eimproper,f1,f3,f4,
+      ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,eimproper,f1,f2,f4,
                    vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr);
     }
   }