diff --git a/src/KOKKOS/angle_charmm_kokkos.cpp b/src/KOKKOS/angle_charmm_kokkos.cpp
index 346077e07119c4b3f5f45eb535f5ca698083ce30..8dd22022d8f7272d342cf6e86f36700e9086adf6 100644
--- a/src/KOKKOS/angle_charmm_kokkos.cpp
+++ b/src/KOKKOS/angle_charmm_kokkos.cpp
@@ -111,7 +111,6 @@ void AngleCharmmKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagAngleCharmmCompute<0,0> >(0,nanglelist),*this);
     }
   }
-  DeviceType::fence();
 
   if (eflag_global) energy += ev.evdwl;
   if (vflag_global) {
diff --git a/src/KOKKOS/bond_class2_kokkos.cpp b/src/KOKKOS/bond_class2_kokkos.cpp
index b01af92b5fddfdecf00ab2d00ec709a14c6a3741..b3c11c9a06c468be08f9ce2b2a29919c949ff475 100644
--- a/src/KOKKOS/bond_class2_kokkos.cpp
+++ b/src/KOKKOS/bond_class2_kokkos.cpp
@@ -110,7 +110,6 @@ void BondClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagBondClass2Compute<0,0> >(0,nbondlist),*this);
     }
   }
-  //DeviceType::fence();
 
   if (eflag_global) energy += ev.evdwl;
   if (vflag_global) {
diff --git a/src/KOKKOS/bond_fene_kokkos.cpp b/src/KOKKOS/bond_fene_kokkos.cpp
index cfc37bfa9f6e26afb65d97e985580d485bb65a91..025838340bbb6945b24e51c8bd185c72b1afa7d1 100644
--- a/src/KOKKOS/bond_fene_kokkos.cpp
+++ b/src/KOKKOS/bond_fene_kokkos.cpp
@@ -125,7 +125,6 @@ void BondFENEKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagBondFENECompute<0,0> >(0,nbondlist),*this);
     }
   }
-  DeviceType::fence();
 
   k_warning_flag.template modify<DeviceType>();
   k_warning_flag.template sync<LMPHostType>();
diff --git a/src/KOKKOS/bond_harmonic_kokkos.cpp b/src/KOKKOS/bond_harmonic_kokkos.cpp
index 408f59c5632b87c6177f41d7b623dbc852370eee..da45c70d6c7739f08ad0fb559e6d9d7f768437c4 100644
--- a/src/KOKKOS/bond_harmonic_kokkos.cpp
+++ b/src/KOKKOS/bond_harmonic_kokkos.cpp
@@ -111,7 +111,6 @@ void BondHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagBondHarmonicCompute<0,0> >(0,nbondlist),*this);
     }
   }
-  //DeviceType::fence();
 
   if (eflag_global) energy += ev.evdwl;
   if (vflag_global) {
diff --git a/src/KOKKOS/compute_temp_kokkos.cpp b/src/KOKKOS/compute_temp_kokkos.cpp
index 6a24591d6cbb66842df6cddefeef873f8f78203b..2ea67a1fb135a695a1e37b3c5d3e0de2ae82762e 100644
--- a/src/KOKKOS/compute_temp_kokkos.cpp
+++ b/src/KOKKOS/compute_temp_kokkos.cpp
@@ -63,7 +63,6 @@ double ComputeTempKokkos<DeviceType>::compute_scalar()
     Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagComputeTempScalar<1> >(0,nlocal),*this,t_kk);
   else
     Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagComputeTempScalar<0> >(0,nlocal),*this,t_kk);
-  DeviceType::fence();
   copymode = 0;
 
   t = t_kk.t0; // could make this more efficient
@@ -118,7 +117,6 @@ void ComputeTempKokkos<DeviceType>::compute_vector()
     Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagComputeTempVector<1> >(0,nlocal),*this,t_kk);
   else
     Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagComputeTempVector<0> >(0,nlocal),*this,t_kk);
-  DeviceType::fence();
   copymode = 0;
 
   t[0] = t_kk.t0;
diff --git a/src/KOKKOS/dihedral_charmm_kokkos.cpp b/src/KOKKOS/dihedral_charmm_kokkos.cpp
index ee9e3d1244b0c4c22bddee191751471803c4a08e..a8a8aade604a001cc8e2499b2be9c06f74f500af 100644
--- a/src/KOKKOS/dihedral_charmm_kokkos.cpp
+++ b/src/KOKKOS/dihedral_charmm_kokkos.cpp
@@ -132,7 +132,6 @@ void DihedralCharmmKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagDihedralCharmmCompute<0,0> >(0,ndihedrallist),*this);
     }
   }
-  DeviceType::fence();
 
   // error check
 
diff --git a/src/KOKKOS/dihedral_class2_kokkos.cpp b/src/KOKKOS/dihedral_class2_kokkos.cpp
index edfd1b3395ba909aec568fdd86d5867665efcad1..89e42c6f836a1f3fa9e502875332bffb8bb4cd08 100644
--- a/src/KOKKOS/dihedral_class2_kokkos.cpp
+++ b/src/KOKKOS/dihedral_class2_kokkos.cpp
@@ -159,7 +159,6 @@ void DihedralClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagDihedralClass2Compute<0,0> >(0,ndihedrallist),*this);
     }
   }
-  DeviceType::fence();
 
   // error check
 
diff --git a/src/KOKKOS/dihedral_opls_kokkos.cpp b/src/KOKKOS/dihedral_opls_kokkos.cpp
index 8e222ad86083e762df7e698be81b6e080d42fcbf..e37d4d2ef5636b867e927da9421b06f959d0b093 100644
--- a/src/KOKKOS/dihedral_opls_kokkos.cpp
+++ b/src/KOKKOS/dihedral_opls_kokkos.cpp
@@ -121,7 +121,6 @@ void DihedralOPLSKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagDihedralOPLSCompute<0,0> >(0,ndihedrallist),*this);
     }
   }
-  DeviceType::fence();
 
   // error check
 
diff --git a/src/KOKKOS/fix_langevin_kokkos.cpp b/src/KOKKOS/fix_langevin_kokkos.cpp
index 0572dcedbecbfab7634285d2dff7b73ac967e008..fb0f329a91bb398bad243493e9d834ac3ab1aa8d 100644
--- a/src/KOKKOS/fix_langevin_kokkos.cpp
+++ b/src/KOKKOS/fix_langevin_kokkos.cpp
@@ -506,7 +506,6 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag)
               Kokkos::parallel_for(nlocal,post_functor);
             }
 
-  DeviceType::fence();
 
   if(tbiasflag == BIAS){
     atomKK->sync(temperature->execution_space,temperature->datamask_read);
@@ -531,7 +530,6 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag)
     // set total force zero in parallel on the device
     FixLangevinKokkosZeroForceFunctor<DeviceType> zero_functor(this);
     Kokkos::parallel_for(nlocal,zero_functor);
-    DeviceType::fence();
   }
   // f is modified by both post_force and zero_force functors
   atomKK->modified(execution_space,datamask_modify);
@@ -726,7 +724,6 @@ double FixLangevinKokkos<DeviceType>::compute_scalar()
     k_flangevin.template sync<DeviceType>();
     FixLangevinKokkosTallyEnergyFunctor<DeviceType> scalar_functor(this);
     Kokkos::parallel_reduce(nlocal,scalar_functor,energy_onestep);
-    DeviceType::fence();
     energy = 0.5*energy_onestep*update->dt;
   }
 
@@ -770,7 +767,6 @@ void FixLangevinKokkos<DeviceType>::end_of_step()
   k_flangevin.template sync<DeviceType>();
   FixLangevinKokkosTallyEnergyFunctor<DeviceType> tally_functor(this);
   Kokkos::parallel_reduce(nlocal,tally_functor,energy_onestep);
-  DeviceType::fence();
 
   energy += energy_onestep*update->dt;
 }
diff --git a/src/KOKKOS/fix_nh_kokkos.cpp b/src/KOKKOS/fix_nh_kokkos.cpp
index 2b55259365d5100ced33316ea81f272b49bdb0cc..fb03bf68c6308163a2126a0e9f9a2ec82f9139c2 100644
--- a/src/KOKKOS/fix_nh_kokkos.cpp
+++ b/src/KOKKOS/fix_nh_kokkos.cpp
@@ -495,7 +495,6 @@ void FixNHKokkos<DeviceType>::nh_v_press()
     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNH_nh_v_press<1> >(0,nlocal),*this);
   else
     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNH_nh_v_press<0> >(0,nlocal),*this);
-  DeviceType::fence();
   copymode = 0;
 
   atomKK->modified(execution_space,V_MASK);
@@ -550,7 +549,6 @@ void FixNHKokkos<DeviceType>::nve_v()
     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNH_nve_v<1> >(0,nlocal),*this);
   else
     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNH_nve_v<0> >(0,nlocal),*this);
-  DeviceType::fence();
   copymode = 0;
 }
 
@@ -595,7 +593,6 @@ void FixNHKokkos<DeviceType>::nve_x()
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNH_nve_x>(0,nlocal),*this);
-  DeviceType::fence();
   copymode = 0;
 }
 
@@ -631,7 +628,6 @@ void FixNHKokkos<DeviceType>::nh_v_temp()
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNH_nh_v_temp>(0,nlocal),*this);
-  DeviceType::fence();
   copymode = 0;
 
   atomKK->modified(execution_space,V_MASK);
diff --git a/src/KOKKOS/fix_nve_kokkos.cpp b/src/KOKKOS/fix_nve_kokkos.cpp
index 4c041f85b0794a2f63ff2d803c27ed84db275b61..eb41443bab6c410044ad6a2f32a0cd7c3ee2feb0 100644
--- a/src/KOKKOS/fix_nve_kokkos.cpp
+++ b/src/KOKKOS/fix_nve_kokkos.cpp
@@ -76,7 +76,6 @@ void FixNVEKokkos<DeviceType>::initial_integrate(int vflag)
     FixNVEKokkosInitialIntegrateFunctor<DeviceType,0> functor(this);
     Kokkos::parallel_for(nlocal,functor);
   }
-  DeviceType::fence();
 }
 
 template<class DeviceType>
@@ -133,7 +132,6 @@ void FixNVEKokkos<DeviceType>::final_integrate()
     FixNVEKokkosFinalIntegrateFunctor<DeviceType,0> functor(this);
     Kokkos::parallel_for(nlocal,functor);
   }
-  DeviceType::fence();
 
   // debug
   //atomKK->sync(Host,datamask_read);
diff --git a/src/KOKKOS/fix_qeq_reax_kokkos.cpp b/src/KOKKOS/fix_qeq_reax_kokkos.cpp
index 2e46b85fd2e7800b1883d0925f1285f061edc12e..5cafbd2ef3d2b1cf0626c7f736e33f4c02ec2e8f 100644
--- a/src/KOKKOS/fix_qeq_reax_kokkos.cpp
+++ b/src/KOKKOS/fix_qeq_reax_kokkos.cpp
@@ -234,12 +234,10 @@ void FixQEqReaxKokkos<DeviceType>::pre_force(int vflag)
   // compute_H
   FixQEqReaxKokkosComputeHFunctor<DeviceType> computeH_functor(this);
   Kokkos::parallel_scan(inum,computeH_functor);
-  DeviceType::fence();
 
   // init_matvec
   FixQEqReaxKokkosMatVecFunctor<DeviceType> matvec_functor(this);
   Kokkos::parallel_for(inum,matvec_functor);
-  DeviceType::fence();
 
   // comm->forward_comm_fix(this); //Dist_vector( s );
   pack_flag = 2;
@@ -259,15 +257,12 @@ void FixQEqReaxKokkos<DeviceType>::pre_force(int vflag)
 
   // 1st cg solve over b_s, s
   cg_solve1();
-  DeviceType::fence();
 
   // 2nd cg solve over b_t, t
   cg_solve2();
-  DeviceType::fence();
 
   // calculate_Q();
   calculate_q();
-  DeviceType::fence();
 
   copymode = 0;
 
@@ -354,7 +349,6 @@ void FixQEqReaxKokkos<DeviceType>::allocate_array()
   const int ignum = atom->nlocal + atom->nghost;
   FixQEqReaxKokkosZeroFunctor<DeviceType> zero_functor(this);
   Kokkos::parallel_for(ignum,zero_functor);
-  DeviceType::fence();
 
 }
 /* ---------------------------------------------------------------------- */
@@ -499,10 +493,8 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1()
   // sparse_matvec( &H, x, q );
   FixQEqReaxKokkosSparse12Functor<DeviceType> sparse12_functor(this);
   Kokkos::parallel_for(inum,sparse12_functor);
-  DeviceType::fence();
   if (neighflag != FULL) {
     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagZeroQGhosts>(nlocal,nlocal+atom->nghost),*this);
-    DeviceType::fence();
     if (neighflag == HALF) {
       FixQEqReaxKokkosSparse13Functor<DeviceType,HALF> sparse13_functor(this);
       Kokkos::parallel_for(inum,sparse13_functor);
@@ -513,7 +505,6 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1()
   } else {
     Kokkos::parallel_for(Kokkos::TeamPolicy <DeviceType, TagSparseMatvec1> (inum, teamsize), *this);
   }
-  DeviceType::fence();
 
   if (neighflag != FULL) {
     k_o.template modify<DeviceType>();
@@ -529,21 +520,17 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1()
   F_FLOAT my_norm = 0.0;
   FixQEqReaxKokkosNorm1Functor<DeviceType> norm1_functor(this);
   Kokkos::parallel_reduce(inum,norm1_functor,my_norm);
-  DeviceType::fence();
   F_FLOAT norm_sqr = 0.0;
   MPI_Allreduce( &my_norm, &norm_sqr, 1, MPI_DOUBLE, MPI_SUM, world );
   b_norm = sqrt(norm_sqr);
-  DeviceType::fence();
 
   // sig_new = parallel_dot( r, d, nn);
   F_FLOAT my_dot = 0.0;
   FixQEqReaxKokkosDot1Functor<DeviceType> dot1_functor(this);
   Kokkos::parallel_reduce(inum,dot1_functor,my_dot);
-  DeviceType::fence();
   F_FLOAT dot_sqr = 0.0;
   MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world );
   F_FLOAT sig_new = dot_sqr;
-  DeviceType::fence();
 
   int loop;
   const int loopmax = 200;
@@ -560,10 +547,8 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1()
     // sparse_matvec( &H, d, q );
     FixQEqReaxKokkosSparse22Functor<DeviceType> sparse22_functor(this);
     Kokkos::parallel_for(inum,sparse22_functor);
-    DeviceType::fence();
     if (neighflag != FULL) {
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagZeroQGhosts>(nlocal,nlocal+atom->nghost),*this);
-      DeviceType::fence();
       if (neighflag == HALF) {
         FixQEqReaxKokkosSparse23Functor<DeviceType,HALF> sparse23_functor(this);
         Kokkos::parallel_for(inum,sparse23_functor);
@@ -574,7 +559,6 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1()
     } else {
       Kokkos::parallel_for(Kokkos::TeamPolicy <DeviceType, TagSparseMatvec2> (inum, teamsize), *this);
     }
-    DeviceType::fence();
 
 
     if (neighflag != FULL) {
@@ -589,7 +573,6 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1()
     my_dot = dot_sqr = 0.0;
     FixQEqReaxKokkosDot2Functor<DeviceType> dot2_functor(this);
     Kokkos::parallel_reduce(inum,dot2_functor,my_dot);
-    DeviceType::fence();
     MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world );
     tmp = dot_sqr;
 
@@ -602,12 +585,10 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1()
     my_dot = dot_sqr = 0.0;
     FixQEqReaxKokkosPrecon1Functor<DeviceType> precon1_functor(this);
     Kokkos::parallel_for(inum,precon1_functor);
-    DeviceType::fence();
     // preconditioning: p[j] = r[j] * Hdia_inv[j];
     // sig_new = parallel_dot( r, p, nn);
     FixQEqReaxKokkosPreconFunctor<DeviceType> precon_functor(this);
     Kokkos::parallel_reduce(inum,precon_functor,my_dot);
-    DeviceType::fence();
     MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world );
     sig_new = dot_sqr;
 
@@ -616,7 +597,6 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1()
     // vector_sum( d, 1., p, beta, d, nn );
     FixQEqReaxKokkosVecSum2Functor<DeviceType> vecsum2_functor(this);
     Kokkos::parallel_for(inum,vecsum2_functor);
-    DeviceType::fence();
   }
 
   if (loop >= loopmax && comm->me == 0) {
@@ -644,10 +624,8 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2()
   // sparse_matvec( &H, x, q );
   FixQEqReaxKokkosSparse32Functor<DeviceType> sparse32_functor(this);
   Kokkos::parallel_for(inum,sparse32_functor);
-  DeviceType::fence();
   if (neighflag != FULL) {
     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagZeroQGhosts>(nlocal,nlocal+atom->nghost),*this);
-    DeviceType::fence();
     if (neighflag == HALF) {
       FixQEqReaxKokkosSparse33Functor<DeviceType,HALF> sparse33_functor(this);
       Kokkos::parallel_for(inum,sparse33_functor);
@@ -658,7 +636,6 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2()
   } else {
     Kokkos::parallel_for(Kokkos::TeamPolicy <DeviceType, TagSparseMatvec3> (inum, teamsize), *this);
   }
-  DeviceType::fence();
 
   if (neighflag != FULL) {
     k_o.template modify<DeviceType>();
@@ -674,21 +651,17 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2()
   F_FLOAT my_norm = 0.0;
   FixQEqReaxKokkosNorm2Functor<DeviceType> norm2_functor(this);
   Kokkos::parallel_reduce(inum,norm2_functor,my_norm);
-  DeviceType::fence();
   F_FLOAT norm_sqr = 0.0;
   MPI_Allreduce( &my_norm, &norm_sqr, 1, MPI_DOUBLE, MPI_SUM, world );
   b_norm = sqrt(norm_sqr);
-  DeviceType::fence();
 
   // sig_new = parallel_dot( r, d, nn);
   F_FLOAT my_dot = 0.0;
   FixQEqReaxKokkosDot1Functor<DeviceType> dot1_functor(this);
   Kokkos::parallel_reduce(inum,dot1_functor,my_dot);
-  DeviceType::fence();
   F_FLOAT dot_sqr = 0.0;
   MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world );
   F_FLOAT sig_new = dot_sqr;
-  DeviceType::fence();
 
   int loop;
   const int loopmax = 200;
@@ -705,10 +678,8 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2()
     // sparse_matvec( &H, d, q );
     FixQEqReaxKokkosSparse22Functor<DeviceType> sparse22_functor(this);
     Kokkos::parallel_for(inum,sparse22_functor);
-    DeviceType::fence();
     if (neighflag != FULL) {
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagZeroQGhosts>(nlocal,nlocal+atom->nghost),*this);
-      DeviceType::fence();
       if (neighflag == HALF) {
         FixQEqReaxKokkosSparse23Functor<DeviceType,HALF> sparse23_functor(this);
         Kokkos::parallel_for(inum,sparse23_functor);
@@ -719,7 +690,6 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2()
     } else {
       Kokkos::parallel_for(Kokkos::TeamPolicy <DeviceType, TagSparseMatvec2> (inum, teamsize), *this);
     }
-    DeviceType::fence();
 
     if (neighflag != FULL) {
       k_o.template modify<DeviceType>();
@@ -733,10 +703,8 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2()
     my_dot = dot_sqr = 0.0;
     FixQEqReaxKokkosDot2Functor<DeviceType> dot2_functor(this);
     Kokkos::parallel_reduce(inum,dot2_functor,my_dot);
-    DeviceType::fence();
     MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world );
     tmp = dot_sqr;
-    DeviceType::fence();
 
     alpha = sig_new / tmp;
 
@@ -747,12 +715,10 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2()
     my_dot = dot_sqr = 0.0;
     FixQEqReaxKokkosPrecon2Functor<DeviceType> precon2_functor(this);
     Kokkos::parallel_for(inum,precon2_functor);
-    DeviceType::fence();
     // preconditioning: p[j] = r[j] * Hdia_inv[j];
     // sig_new = parallel_dot( r, p, nn);
     FixQEqReaxKokkosPreconFunctor<DeviceType> precon_functor(this);
     Kokkos::parallel_reduce(inum,precon_functor,my_dot);
-    DeviceType::fence();
     MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world );
     sig_new = dot_sqr;
 
@@ -761,7 +727,6 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2()
     // vector_sum( d, 1., p, beta, d, nn );
     FixQEqReaxKokkosVecSum2Functor<DeviceType> vecsum2_functor(this);
     Kokkos::parallel_for(inum,vecsum2_functor);
-    DeviceType::fence();
   }
 
   if (loop >= loopmax && comm->me == 0) {
@@ -786,7 +751,6 @@ void FixQEqReaxKokkos<DeviceType>::calculate_q()
   sum = sum_all = 0.0;
   FixQEqReaxKokkosVecAcc1Functor<DeviceType> vecacc1_functor(this);
   Kokkos::parallel_reduce(inum,vecacc1_functor,sum);
-  DeviceType::fence();
   MPI_Allreduce(&sum, &sum_all, 1, MPI_DOUBLE, MPI_SUM, world );
   const F_FLOAT s_sum = sum_all;
 
@@ -794,7 +758,6 @@ void FixQEqReaxKokkos<DeviceType>::calculate_q()
   sum = sum_all = 0.0;
   FixQEqReaxKokkosVecAcc2Functor<DeviceType> vecacc2_functor(this);
   Kokkos::parallel_reduce(inum,vecacc2_functor,sum);
-  DeviceType::fence();
   MPI_Allreduce(&sum, &sum_all, 1, MPI_DOUBLE, MPI_SUM, world );
   const F_FLOAT t_sum = sum_all;
 
@@ -804,7 +767,6 @@ void FixQEqReaxKokkos<DeviceType>::calculate_q()
   // q[i] = s[i] - u * t[i];
   FixQEqReaxKokkosCalculateQFunctor<DeviceType> calculateQ_functor(this);
   Kokkos::parallel_for(inum,calculateQ_functor);
-  DeviceType::fence();
 
   pack_flag = 4;
   //comm->forward_comm_fix( this ); //Dist_vector( atom->q );
diff --git a/src/KOKKOS/fix_setforce_kokkos.cpp b/src/KOKKOS/fix_setforce_kokkos.cpp
index 27f7d100fa57f0a86ddc3c311f6f3b4a98cd6d5d..5e26ef3610535a74b9e24ba2fc9f9c9822589c04 100644
--- a/src/KOKKOS/fix_setforce_kokkos.cpp
+++ b/src/KOKKOS/fix_setforce_kokkos.cpp
@@ -108,7 +108,6 @@ void FixSetForceKokkos<DeviceType>::post_force(int vflag)
   if (varflag == CONSTANT) {
     copymode = 1;
     Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagFixSetForceConstant>(0,nlocal),*this,foriginal_kk);
-    DeviceType::fence();
     copymode = 0;
 
   // variable force, wrap with clear/add
@@ -138,7 +137,6 @@ void FixSetForceKokkos<DeviceType>::post_force(int vflag)
 
     copymode = 1;
     Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagFixSetForceNonConstant>(0,nlocal),*this,foriginal_kk);
-    DeviceType::fence();
     copymode = 0;
   }
 
diff --git a/src/KOKKOS/fix_wall_reflect_kokkos.cpp b/src/KOKKOS/fix_wall_reflect_kokkos.cpp
index 55be7e5cd7c5c86335a0a8208cbf8ca39005c520..cd7a2c59b78ae84c3c0ae58f34c3998705b442d1 100644
--- a/src/KOKKOS/fix_wall_reflect_kokkos.cpp
+++ b/src/KOKKOS/fix_wall_reflect_kokkos.cpp
@@ -79,7 +79,6 @@ void FixWallReflectKokkos<DeviceType>::post_integrate()
 
     copymode = 1;
     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixWallReflectPostIntegrate>(0,nlocal),*this);
-    DeviceType::fence();
     copymode = 0;
   }
 
diff --git a/src/KOKKOS/improper_class2_kokkos.cpp b/src/KOKKOS/improper_class2_kokkos.cpp
index 25bd2c732f6621278661e3fb891338a263992466..c2cb7dfe2bf55df1f64988abe60a152d42984654 100644
--- a/src/KOKKOS/improper_class2_kokkos.cpp
+++ b/src/KOKKOS/improper_class2_kokkos.cpp
@@ -140,7 +140,6 @@ void ImproperClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagImproperClass2Compute<0,0> >(0,nimproperlist),*this);
     }
   }
-  DeviceType::fence();
   if (eflag_global) energy += ev.evdwl;
 
   // error check
@@ -165,7 +164,6 @@ void ImproperClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagImproperClass2AngleAngle<0,0> >(0,nimproperlist),*this);
     }
   }
-  DeviceType::fence();
   if (eflag_global) energy += ev.evdwl;
 
   if (vflag_global) {
diff --git a/src/KOKKOS/improper_harmonic_kokkos.cpp b/src/KOKKOS/improper_harmonic_kokkos.cpp
index 9c99b35bd9ec374953985eeec173dcea58d40c76..1e58e18c5189766dad20958c535f0eb123169950 100644
--- a/src/KOKKOS/improper_harmonic_kokkos.cpp
+++ b/src/KOKKOS/improper_harmonic_kokkos.cpp
@@ -128,7 +128,6 @@ void ImproperHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagImproperHarmonicCompute<0,0> >(0,nimproperlist),*this);
     }
   }
-  //DeviceType::fence();
 
   // error check
 
diff --git a/src/KOKKOS/pair_eam_alloy_kokkos.cpp b/src/KOKKOS/pair_eam_alloy_kokkos.cpp
index 45c320bc516fcfd13816a6dfe809ec7296d035c5..76c701213d623303f354c3e8536ef85e121e951b 100644
--- a/src/KOKKOS/pair_eam_alloy_kokkos.cpp
+++ b/src/KOKKOS/pair_eam_alloy_kokkos.cpp
@@ -409,7 +409,6 @@ int PairEAMAlloyKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_i
   iswap = iswap_in;
   v_buf = buf.view<DeviceType>();
   Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairEAMAlloyPackForwardComm>(0,n),*this);
-  DeviceType::fence();
   return n;
 }
 
@@ -428,7 +427,6 @@ void PairEAMAlloyKokkos<DeviceType>::unpack_forward_comm_kokkos(int n, int first
   first = first_in;
   v_buf = buf.view<DeviceType>();
   Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairEAMAlloyUnpackForwardComm>(0,n),*this);
-  DeviceType::fence();
 }
 
 template<class DeviceType>
diff --git a/src/KOKKOS/pair_eam_fs_kokkos.cpp b/src/KOKKOS/pair_eam_fs_kokkos.cpp
index b9fa82740aeb0d59bba5e1c5d5d9f6e762292941..9b565f8edee1b7df915f220b78d2e932d353417c 100644
--- a/src/KOKKOS/pair_eam_fs_kokkos.cpp
+++ b/src/KOKKOS/pair_eam_fs_kokkos.cpp
@@ -133,7 +133,6 @@ void PairEAMFSKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSInitialize>(0,nall),*this);
   else
     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSInitialize>(0,nlocal),*this);
-  DeviceType::fence();
 
   // loop over neighbors of my atoms
 
@@ -156,7 +155,6 @@ void PairEAMFSKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
         Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelA<HALFTHREAD,0> >(0,inum),*this);
       }
     }
-    DeviceType::fence();
 
     // communicate and sum densities (on the host)
 
@@ -174,7 +172,6 @@ void PairEAMFSKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelB<1> >(0,inum),*this,ev);
     else
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelB<0> >(0,inum),*this);
-    DeviceType::fence();
 
   } else if (neighflag == FULL) {
 
@@ -184,7 +181,6 @@ void PairEAMFSKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelAB<1> >(0,inum),*this,ev);
     else
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelAB<0> >(0,inum),*this);
-    DeviceType::fence();
   }
 
   if (eflag) {
@@ -239,7 +235,6 @@ void PairEAMFSKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       }
     }
   }
-  DeviceType::fence();
 
   if (eflag_global) eng_vdwl += ev.evdwl;
   if (vflag_global) {
@@ -414,7 +409,6 @@ int PairEAMFSKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_
   iswap = iswap_in;
   v_buf = buf.view<DeviceType>();
   Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairEAMFSPackForwardComm>(0,n),*this);
-  DeviceType::fence();
   return n;
 }
 
@@ -433,7 +427,6 @@ void PairEAMFSKokkos<DeviceType>::unpack_forward_comm_kokkos(int n, int first_in
   first = first_in;
   v_buf = buf.view<DeviceType>();
   Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairEAMFSUnpackForwardComm>(0,n),*this);
-  DeviceType::fence();
 }
 
 template<class DeviceType>
diff --git a/src/KOKKOS/pair_eam_kokkos.cpp b/src/KOKKOS/pair_eam_kokkos.cpp
index e4128de72265b609e9d86723d8acc9100e446346..7be8e54605fc0443d92eb64f32598b50ef2bf38e 100644
--- a/src/KOKKOS/pair_eam_kokkos.cpp
+++ b/src/KOKKOS/pair_eam_kokkos.cpp
@@ -409,7 +409,6 @@ int PairEAMKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_2d
   iswap = iswap_in;
   v_buf = buf.view<DeviceType>();
   Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairEAMPackForwardComm>(0,n),*this);
-  DeviceType::fence();
   return n;
 }
 
@@ -428,7 +427,6 @@ void PairEAMKokkos<DeviceType>::unpack_forward_comm_kokkos(int n, int first_in,
   first = first_in;
   v_buf = buf.view<DeviceType>();
   Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairEAMUnpackForwardComm>(0,n),*this);
-  DeviceType::fence();
 }
 
 template<class DeviceType>
diff --git a/src/KOKKOS/pair_reaxc_kokkos.cpp b/src/KOKKOS/pair_reaxc_kokkos.cpp
index 59369b5e082c3b383012670da52697f98e7a0163..6082c932874f314183370fbb9023b3d091b506e6 100644
--- a/src/KOKKOS/pair_reaxc_kokkos.cpp
+++ b/src/KOKKOS/pair_reaxc_kokkos.cpp
@@ -731,7 +731,6 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
     else
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputePolar<HALFTHREAD,0> >(0,inum),*this);
   }
-  DeviceType::fence();
   ev_all += ev;
   pvector[13] = ev.ecoul;
 
@@ -771,7 +770,6 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
         Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeLJCoulomb<FULL,0> >(0,inum),*this);
     }
   }
-  DeviceType::fence();
   ev_all += ev;
   pvector[10] = ev.evdwl;
   pvector[11] = ev.ecoul;
@@ -800,7 +798,6 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
 
     // zero
     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxZero>(0,nmax),*this);
-    DeviceType::fence();
 
     if (neighflag == HALF)
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBuildListsHalf<HALF> >(0,ignum),*this);
@@ -808,7 +805,6 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBuildListsHalf_LessAtomics<HALFTHREAD> >(0,ignum),*this);
     else //(neighflag == FULL)
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBuildListsFull>(0,ignum),*this);
-    DeviceType::fence();
 
     k_resize_bo.modify<DeviceType>();
     k_resize_bo.sync<LMPHostType>();
@@ -827,15 +823,11 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
   // Bond order
   if (neighflag == HALF) {
     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBondOrder1>(0,ignum),*this);
-    DeviceType::fence();
   } else if (neighflag == HALFTHREAD) {
     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBondOrder1_LessAtomics>(0,ignum),*this);
-    DeviceType::fence();
   }
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBondOrder2>(0,ignum),*this);
-  DeviceType::fence();
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBondOrder3>(0,ignum),*this);
-  DeviceType::fence();
 
   // Bond energy
   if (neighflag == HALF) {
@@ -843,7 +835,6 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond1<HALF,1> >(0,inum),*this,ev);
     else
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond1<HALF,0> >(0,inum),*this);
-    DeviceType::fence();
     ev_all += ev;
     pvector[0] = ev.evdwl;
   } else { //if (neighflag == HALFTHREAD) {
@@ -851,7 +842,6 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond1<HALFTHREAD,1> >(0,inum),*this,ev);
     else
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond1<HALFTHREAD,0> >(0,inum),*this);
-    DeviceType::fence();
     ev_all += ev;
     pvector[0] = ev.evdwl;
   }
@@ -859,21 +849,17 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
   // Multi-body corrections
   if (neighflag == HALF) {
     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti1<HALF,0> >(0,inum),*this);
-    DeviceType::fence();
     if (evflag)
       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti2<HALF,1> >(0,inum),*this,ev);
     else
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti2<HALF,0> >(0,inum),*this);
-    DeviceType::fence();
     ev_all += ev;
   } else { //if (neighflag == HALFTHREAD) {
     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti1<HALFTHREAD,0> >(0,inum),*this);
-    DeviceType::fence();
     if (evflag)
       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti2<HALFTHREAD,1> >(0,inum),*this,ev);
     else
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti2<HALFTHREAD,0> >(0,inum),*this);
-    DeviceType::fence();
     ev_all += ev;
   }
   pvector[2] = ev.ereax[0];
@@ -887,14 +873,12 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeAngular<HALF,1> >(0,inum),*this,ev);
     else
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeAngular<HALF,0> >(0,inum),*this);
-    DeviceType::fence();
     ev_all += ev;
   } else { //if (neighflag == HALFTHREAD) {
     if (evflag)
       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeAngular<HALFTHREAD,1> >(0,inum),*this,ev);
     else
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeAngular<HALFTHREAD,0> >(0,inum),*this);
-    DeviceType::fence();
     ev_all += ev;
   }
   pvector[4] = ev.ereax[3];
@@ -908,14 +892,12 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeTorsion<HALF,1> >(0,inum),*this,ev);
     else
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeTorsion<HALF,0> >(0,inum),*this);
-    DeviceType::fence();
     ev_all += ev;
   } else { //if (neighflag == HALFTHREAD) {
     if (evflag)
       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeTorsion<HALFTHREAD,1> >(0,inum),*this,ev);
     else
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeTorsion<HALFTHREAD,0> >(0,inum),*this);
-    DeviceType::fence();
     ev_all += ev;
   }
   pvector[8] = ev.ereax[6];
@@ -929,14 +911,12 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
         Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeHydrogen<HALF,1> >(0,inum),*this,ev);
       else
         Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeHydrogen<HALF,0> >(0,inum),*this);
-      DeviceType::fence();
       ev_all += ev;
     } else { //if (neighflag == HALFTHREAD) {
       if (evflag)
         Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeHydrogen<HALFTHREAD,1> >(0,inum),*this,ev);
       else
         Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeHydrogen<HALFTHREAD,0> >(0,inum),*this);
-      DeviceType::fence();
       ev_all += ev;
     }
   }
@@ -946,22 +926,18 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
   // Bond force
   if (neighflag == HALF) {
     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxUpdateBond<HALF> >(0,ignum),*this);
-    DeviceType::fence();
     if (evflag)
       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond2<HALF,1> >(0,ignum),*this,ev);
     else
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond2<HALF,0> >(0,ignum),*this);
-    DeviceType::fence();
     ev_all += ev;
     pvector[0] += ev.evdwl;
   } else { //if (neighflag == HALFTHREAD) {
     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxUpdateBond<HALFTHREAD> >(0,ignum),*this);
-    DeviceType::fence();
     if (evflag)
       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond2<HALFTHREAD,1> >(0,ignum),*this,ev);
     else
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond2<HALFTHREAD,0> >(0,ignum),*this);
-    DeviceType::fence();
     ev_all += ev;
     pvector[0] += ev.evdwl;
   }
@@ -3945,11 +3921,9 @@ void PairReaxCKokkos<DeviceType>::ev_setup(int eflag, int vflag)
   if (vflag_global) for (i = 0; i < 6; i++) virial[i] = 0.0;
   if (eflag_atom) {
     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxZeroEAtom>(0,maxeatom),*this);
-    DeviceType::fence();
   }
   if (vflag_atom) {
     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxZeroVAtom>(0,maxvatom),*this);
-    DeviceType::fence();
   }
 
   // if vflag_global = 2 and pair::compute() calls virial_fdotr_compute()
@@ -4002,7 +3976,6 @@ void PairReaxCKokkos<DeviceType>::FindBond(int &numbonds)
 {
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxFindBondZero>(0,nmax),*this);
-  DeviceType::fence();
 
   bo_cut_bond = control->bg_cut;
 
@@ -4017,7 +3990,6 @@ void PairReaxCKokkos<DeviceType>::FindBond(int &numbonds)
   numbonds = 0;
   PairReaxCKokkosFindBondFunctor<DeviceType> find_bond_functor(this);
   Kokkos::parallel_reduce(inum,find_bond_functor,numbonds);
-  DeviceType::fence();
   copymode = 0;
 }
 
@@ -4076,7 +4048,6 @@ void PairReaxCKokkos<DeviceType>::PackBondBuffer(DAT::tdual_ffloat_1d k_buf, int
   nlocal = atomKK->nlocal;
   PairReaxCKokkosPackBondBufferFunctor<DeviceType> pack_bond_buffer_functor(this);
   Kokkos::parallel_scan(nlocal,pack_bond_buffer_functor);
-  DeviceType::fence();
   copymode = 0;
 
   k_buf.modify<DeviceType>();
@@ -4135,11 +4106,9 @@ void PairReaxCKokkos<DeviceType>::FindBondSpecies()
 {
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxFindBondSpeciesZero>(0,nmax),*this);
-  DeviceType::fence();
 
   nlocal = atomKK->nlocal;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxFindBondSpecies>(0,nlocal),*this);
-  DeviceType::fence();
   copymode = 0;
 
   // NOTE: Could improve performance if a Kokkos version of ComputeSpecAtom is added
diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp
index 3ad7334d2f55c982e54e410357c7f3deb445d534..bd3ed3644f53373305a995382144a6d5c27a5432 100644
--- a/src/KOKKOS/pppm_kokkos.cpp
+++ b/src/KOKKOS/pppm_kokkos.cpp
@@ -403,17 +403,14 @@ void PPPMKokkos<DeviceType>::setup()
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_setup1>(nxlo_fft,nxhi_fft+1),*this);
-  DeviceType::fence();
   copymode = 0;
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_setup2>(nylo_fft,nyhi_fft+1),*this);
-  DeviceType::fence();
   copymode = 0;
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_setup3>(nzlo_fft,nzhi_fft+1),*this);
-  DeviceType::fence();
   copymode = 0;
 
   // merge three outer loops into one for better threading
@@ -425,7 +422,6 @@ void PPPMKokkos<DeviceType>::setup()
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_setup4>(0,inum_fft),*this);
-  DeviceType::fence();
   copymode = 0;
 
   compute_gf_ik();
@@ -753,7 +749,6 @@ void PPPMKokkos<DeviceType>::compute(int eflag, int vflag)
     if (eflag_atom) {
       copymode = 1;
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_self1>(0,nlocal),*this);
-      DeviceType::fence();
       copymode = 0;
       //for (i = nlocal; i < ntotal; i++) d_eatom[i] *= 0.5*qscale;
     }
@@ -761,7 +756,6 @@ void PPPMKokkos<DeviceType>::compute(int eflag, int vflag)
     if (vflag_atom) {
       copymode = 1;
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_self2>(0,ntotal),*this);
-      DeviceType::fence();
       copymode = 0;
     }
   }
@@ -1415,7 +1409,6 @@ void PPPMKokkos<DeviceType>::compute_gf_ik()
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_compute_gf_ik>(0,inum_fft),*this);
-  DeviceType::fence();
   copymode = 0;
 }
 
@@ -1495,7 +1488,6 @@ void PPPMKokkos<DeviceType>::compute_gf_ik_triclinic()
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_compute_gf_ik_triclinic>(nzlo_fft,nzhi_fft+1),*this);
-  DeviceType::fence();
   copymode = 0;
 }
 
@@ -1588,7 +1580,6 @@ void PPPMKokkos<DeviceType>::particle_map()
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_particle_map>(0,nlocal),*this);
-  DeviceType::fence();
   copymode = 0;
 
   k_flag.template modify<DeviceType>();
@@ -1641,7 +1632,6 @@ void PPPMKokkos<DeviceType>::make_rho()
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_make_rho_zero>(0,inum_out),*this);
-  DeviceType::fence();
   copymode = 0;
 
   // loop over my charges, add their contribution to nearby grid points
@@ -1654,7 +1644,6 @@ void PPPMKokkos<DeviceType>::make_rho()
 #ifdef KOKKOS_HAVE_CUDA
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_make_rho_atomic>(0,nlocal),*this);
-  DeviceType::fence();
   copymode = 0;
 #else
   ix = nxhi_out-nxlo_out + 1;
@@ -1663,7 +1652,6 @@ void PPPMKokkos<DeviceType>::make_rho()
   copymode = 1;
   Kokkos::TeamPolicy<DeviceType, TagPPPM_make_rho> config(lmp->kokkos->num_threads,1);
   Kokkos::parallel_for(config,*this);
-  DeviceType::fence();
   copymode = 0;
 #endif
 }
@@ -1794,7 +1782,6 @@ void PPPMKokkos<DeviceType>::brick2fft()
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_brick2fft>(0,inum_inout),*this);
-  DeviceType::fence();
   copymode = 0;
 
   k_density_fft.template modify<DeviceType>();
@@ -1842,7 +1829,6 @@ void PPPMKokkos<DeviceType>::poisson_ik()
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik1>(0,nfft),*this);
-  DeviceType::fence();
   copymode = 0;
 
   k_work1.template modify<DeviceType>();
@@ -1862,14 +1848,12 @@ void PPPMKokkos<DeviceType>::poisson_ik()
     if (vflag_global) {
       copymode = 1;
       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik2>(0,nfft),*this,ev);
-      DeviceType::fence();
       copymode = 0;
       for (j = 0; j < 6; j++) virial[j] += ev.v[j];
       energy += ev.ecoul;
     } else {
       copymode = 1;
       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik3>(0,nfft),*this,ev);
-      DeviceType::fence();
       copymode = 0;
       energy += ev.ecoul;
     }
@@ -1880,7 +1864,6 @@ void PPPMKokkos<DeviceType>::poisson_ik()
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik4>(0,nfft),*this);
-  DeviceType::fence();
   copymode = 0;
 
   // extra FFTs for per-atomKK energy/virial
@@ -1914,7 +1897,6 @@ void PPPMKokkos<DeviceType>::poisson_ik()
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik5>(0,inum_fft),*this);
-  DeviceType::fence();
   copymode = 0;
 
   k_work2.template modify<DeviceType>();
@@ -1926,7 +1908,6 @@ void PPPMKokkos<DeviceType>::poisson_ik()
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik6>(0,inum_inout),*this);
-  DeviceType::fence();
   copymode = 0;
 
 
@@ -1934,7 +1915,6 @@ void PPPMKokkos<DeviceType>::poisson_ik()
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik7>(0,inum_fft),*this);
-  DeviceType::fence();
   copymode = 0;
 
   k_work2.template modify<DeviceType>();
@@ -1946,14 +1926,12 @@ void PPPMKokkos<DeviceType>::poisson_ik()
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik8>(0,inum_inout),*this);
-  DeviceType::fence();
   copymode = 0;
 
   // z direction gradient
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik9>(0,inum_fft),*this);
-  DeviceType::fence();
   copymode = 0;
 
   k_work2.template modify<DeviceType>();
@@ -1965,7 +1943,6 @@ void PPPMKokkos<DeviceType>::poisson_ik()
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik10>(0,inum_inout),*this);
-  DeviceType::fence();
   copymode = 0;
 
 }
@@ -2215,7 +2192,6 @@ void PPPMKokkos<DeviceType>::poisson_peratom()
   if (eflag_atom) {
     copymode = 1;
     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom1>(0,nfft),*this);
-    DeviceType::fence();
     copymode = 0;
 
     k_work2.template modify<DeviceType>();
@@ -2227,7 +2203,6 @@ void PPPMKokkos<DeviceType>::poisson_peratom()
 
     copymode = 1;
     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom2>(0,inum_inout),*this);
-    DeviceType::fence();
     copymode = 0;
 
   }
@@ -2238,7 +2213,6 @@ void PPPMKokkos<DeviceType>::poisson_peratom()
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom3>(0,nfft),*this);
-  DeviceType::fence();
   copymode = 0;
 
   k_work2.template modify<DeviceType>();
@@ -2250,13 +2224,11 @@ void PPPMKokkos<DeviceType>::poisson_peratom()
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom4>(0,inum_inout),*this);
-  DeviceType::fence();
   copymode = 0;
 
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom5>(0,nfft),*this);
-  DeviceType::fence();
   copymode = 0;
 
   k_work2.template modify<DeviceType>();
@@ -2268,13 +2240,11 @@ void PPPMKokkos<DeviceType>::poisson_peratom()
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom6>(0,inum_inout),*this);
-  DeviceType::fence();
   copymode = 0;
 
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom7>(0,nfft),*this);
-  DeviceType::fence();
   copymode = 0;
 
   k_work2.template modify<DeviceType>();
@@ -2286,12 +2256,10 @@ void PPPMKokkos<DeviceType>::poisson_peratom()
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom8>(0,inum_inout),*this);
-  DeviceType::fence();
   copymode = 0;
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom9>(0,nfft),*this);
-  DeviceType::fence();
   copymode = 0;
 
   k_work2.template modify<DeviceType>();
@@ -2303,13 +2271,11 @@ void PPPMKokkos<DeviceType>::poisson_peratom()
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom10>(0,inum_inout),*this);
-  DeviceType::fence();
   copymode = 0;
 
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom11>(0,nfft),*this);
-  DeviceType::fence();
   copymode = 0;
 
   k_work2.template modify<DeviceType>();
@@ -2321,13 +2287,11 @@ void PPPMKokkos<DeviceType>::poisson_peratom()
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom12>(0,inum_inout),*this);
-  DeviceType::fence();
   copymode = 0;
 
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom13>(0,nfft),*this);
-  DeviceType::fence();
   copymode = 0;
 
   k_work2.template modify<DeviceType>();
@@ -2339,7 +2303,6 @@ void PPPMKokkos<DeviceType>::poisson_peratom()
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom14>(0,inum_inout),*this);
-  DeviceType::fence();
   copymode = 0;
 
 }
@@ -2545,7 +2508,6 @@ void PPPMKokkos<DeviceType>::fieldforce_ik()
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_fieldforce_ik>(0,nlocal),*this);
-  DeviceType::fence();
   copymode = 0;
 }
 
@@ -2606,7 +2568,6 @@ void PPPMKokkos<DeviceType>::fieldforce_peratom()
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_fieldforce_peratom>(0,nlocal),*this);
-  DeviceType::fence();
   copymode = 0;
 }
 
@@ -2682,12 +2643,10 @@ void PPPMKokkos<DeviceType>::pack_forward_kokkos(int flag, Kokkos::DualView<FFT_
   if (flag == FORWARD_IK) {
     copymode = 1;
     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_pack_forward1>(0,nlist),*this);
-    DeviceType::fence();
     copymode = 0;
   } else if (flag == FORWARD_IK_PERATOM) {
     copymode = 1;
     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_pack_forward2>(0,nlist),*this);
-    DeviceType::fence();
     copymode = 0;
   }
 }
@@ -2740,12 +2699,10 @@ void PPPMKokkos<DeviceType>::unpack_forward_kokkos(int flag, Kokkos::DualView<FF
   if (flag == FORWARD_IK) {
     copymode = 1;
     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_unpack_forward1>(0,nlist),*this);
-    DeviceType::fence();
     copymode = 0;
   } else if (flag == FORWARD_IK_PERATOM) {
     copymode = 1;
     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_unpack_forward2>(0,nlist),*this);
-    DeviceType::fence();
     copymode = 0;
   }
 }
@@ -2798,7 +2755,6 @@ void PPPMKokkos<DeviceType>::pack_reverse_kokkos(int flag, Kokkos::DualView<FFT_
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_pack_reverse>(0,nlist),*this);
-  DeviceType::fence();
   copymode = 0;
 }
 
@@ -2829,7 +2785,6 @@ void PPPMKokkos<DeviceType>::unpack_reverse_kokkos(int flag, Kokkos::DualView<FF
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_unpack_reverse>(0,nlist),*this);
-  DeviceType::fence();
   copymode = 0;
 }
 
@@ -2989,7 +2944,6 @@ void PPPMKokkos<DeviceType>::slabcorr()
   double dipole = 0.0;
   copymode = 1;
   Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPPPM_slabcorr1>(0,nlocal),*this,dipole);
-  DeviceType::fence();
   copymode = 0;
 
   // sum local contributions to get global dipole moment
@@ -3003,7 +2957,6 @@ void PPPMKokkos<DeviceType>::slabcorr()
   if (eflag_atom || fabs(qsum) > SMALL) {
     copymode = 1;
     Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPPPM_slabcorr2>(0,nlocal),*this,dipole_r2);
-    DeviceType::fence();
     copymode = 0;
 
     // sum local contributions
@@ -3027,7 +2980,6 @@ void PPPMKokkos<DeviceType>::slabcorr()
     efact = qscale * MY_2PI/volume;
     copymode = 1;
     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_slabcorr3>(0,nlocal),*this);
-    DeviceType::fence();
     copymode = 0;
   }
 
@@ -3037,7 +2989,6 @@ void PPPMKokkos<DeviceType>::slabcorr()
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_slabcorr4>(0,nlocal),*this);
-  DeviceType::fence();
   copymode = 0;
 }
 
@@ -3081,7 +3032,6 @@ int PPPMKokkos<DeviceType>::timing_1d(int n, double &time1d)
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_timing_zero>(0,2*nfft_both),*this);
-  DeviceType::fence();
   copymode = 0;
 
   MPI_Barrier(world);
@@ -3119,7 +3069,6 @@ int PPPMKokkos<DeviceType>::timing_3d(int n, double &time3d)
 
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_timing_zero>(0,2*nfft_both),*this);
-  DeviceType::fence();
   copymode = 0;
 
   MPI_Barrier(world);