diff --git a/src/KOKKOS/angle_charmm_kokkos.cpp b/src/KOKKOS/angle_charmm_kokkos.cpp
index cb704627659e5bb7961c840086200118cf0407a9..816c9f3105808886d01e83bf2b46aa31905ab506 100755
--- a/src/KOKKOS/angle_charmm_kokkos.cpp
+++ b/src/KOKKOS/angle_charmm_kokkos.cpp
@@ -70,21 +70,20 @@ void AngleCharmmKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
   // reallocate per-atom arrays if necessary
 
   if (eflag_atom) {
-    memory->destroy_kokkos(k_eatom,eatom);
-    memory->create_kokkos(k_eatom,eatom,maxeatom,"angle:eatom");
-    d_eatom = k_eatom.d_view;
+    if(k_eatom.dimension_0()<maxeatom) {
+      memory->destroy_kokkos(k_eatom,eatom);
+      memory->create_kokkos(k_eatom,eatom,maxeatom,"improper:eatom");
+      d_eatom = k_eatom.d_view;
+    }
   }
   if (vflag_atom) {
-    memory->destroy_kokkos(k_vatom,vatom);
-    memory->create_kokkos(k_vatom,vatom,maxvatom,6,"angle:vatom");
-    d_vatom = k_vatom.d_view;
+    if(k_vatom.dimension_0()<maxvatom) {
+      memory->destroy_kokkos(k_vatom,vatom);
+      memory->create_kokkos(k_vatom,vatom,maxvatom,6,"improper:vatom");
+      d_vatom = k_vatom.d_view;
+    }
   }
 
-  atomKK->sync(execution_space,datamask_read);
-  k_k.template sync<DeviceType>();
-  k_theta0.template sync<DeviceType>();
-  k_k_ub.template sync<DeviceType>();
-  k_r_ub.template sync<DeviceType>();
   if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
   else atomKK->modified(execution_space,F_MASK);
 
@@ -145,9 +144,6 @@ template<int NEWTON_BOND, int EVFLAG>
 KOKKOS_INLINE_FUNCTION
 void AngleCharmmKokkos<DeviceType>::operator()(TagAngleCharmmCompute<NEWTON_BOND,EVFLAG>, const int &n, EV_FLOAT& ev) const {
 
-  // The f array is atomic
-  Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > a_f = f;
-
   const int i1 = anglelist(n,0);
   const int i2 = anglelist(n,1);
   const int i3 = anglelist(n,2);
@@ -227,21 +223,21 @@ void AngleCharmmKokkos<DeviceType>::operator()(TagAngleCharmmCompute<NEWTON_BOND
   // apply force to each of 3 atoms
 
   if (NEWTON_BOND || i1 < nlocal) {
-    a_f(i1,0) += f1[0];
-    a_f(i1,1) += f1[1];
-    a_f(i1,2) += f1[2];
+    f(i1,0) += f1[0];
+    f(i1,1) += f1[1];
+    f(i1,2) += f1[2];
   }
 
   if (NEWTON_BOND || i2 < nlocal) {
-    a_f(i2,0) -= f1[0] + f3[0];
-    a_f(i2,1) -= f1[1] + f3[1];
-    a_f(i2,2) -= f1[2] + f3[2];
+    f(i2,0) -= f1[0] + f3[0];
+    f(i2,1) -= f1[1] + f3[1];
+    f(i2,2) -= f1[2] + f3[2];
   }
 
   if (NEWTON_BOND || i3 < nlocal) {
-    a_f(i3,0) += f3[0];
-    a_f(i3,1) += f3[1];
-    a_f(i3,2) += f3[2];
+    f(i3,0) += f3[0];
+    f(i3,1) += f3[1];
+    f(i3,2) += f3[2];
   }
 
   if (EVFLAG) ev_tally(ev,i1,i2,i3,eangle,f1,f3,
@@ -262,17 +258,6 @@ template<class DeviceType>
 void AngleCharmmKokkos<DeviceType>::allocate()
 {
   AngleCharmm::allocate();
-
-  int n = atom->nangletypes;
-  k_k = DAT::tdual_ffloat_1d("AngleCharmm::k",n+1);
-  k_theta0 = DAT::tdual_ffloat_1d("AngleCharmm::theta0",n+1);
-  k_k_ub = DAT::tdual_ffloat_1d("AngleCharmm::k_ub",n+1);
-  k_r_ub = DAT::tdual_ffloat_1d("AngleCharmm::r_ub",n+1);
-
-  d_k = k_k.d_view;
-  d_theta0 = k_theta0.d_view;
-  d_k_ub = k_k_ub.d_view;
-  d_r_ub = k_r_ub.d_view;
 }
 
 /* ----------------------------------------------------------------------
@@ -285,6 +270,16 @@ void AngleCharmmKokkos<DeviceType>::coeff(int narg, char **arg)
   AngleCharmm::coeff(narg, arg);
 
   int n = atom->nangletypes;
+  Kokkos::DualView<F_FLOAT*,DeviceType> k_k("AngleCharmm::k",n+1);
+  Kokkos::DualView<F_FLOAT*,DeviceType> k_theta0("AngleCharmm::theta0",n+1);
+  Kokkos::DualView<F_FLOAT*,DeviceType> k_k_ub("AngleCharmm::k_ub",n+1);
+  Kokkos::DualView<F_FLOAT*,DeviceType> k_r_ub("AngleCharmm::r_ub",n+1);
+
+  d_k = k_k.d_view;
+  d_theta0 = k_theta0.d_view;
+  d_k_ub = k_k_ub.d_view;
+  d_r_ub = k_r_ub.d_view;
+
   for (int i = 1; i <= n; i++) {
     k_k.h_view[i] = k[i];
     k_theta0.h_view[i] = theta0[i];
@@ -296,6 +291,12 @@ void AngleCharmmKokkos<DeviceType>::coeff(int narg, char **arg)
   k_theta0.template modify<LMPHostType>();
   k_k_ub.template modify<LMPHostType>();
   k_r_ub.template modify<LMPHostType>();
+
+  k_k.template sync<DeviceType>();
+  k_theta0.template sync<DeviceType>();
+  k_k_ub.template sync<DeviceType>();
+  k_r_ub.template sync<DeviceType>();
+
 }
 
 /* ----------------------------------------------------------------------
@@ -314,10 +315,6 @@ void AngleCharmmKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int i, const in
   E_FLOAT eanglethird;
   F_FLOAT v[6];
 
-  // The eatom and vatom arrays are atomic
-  Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_eatom = k_eatom.view<DeviceType>();
-  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_vatom = k_vatom.view<DeviceType>();
-
   if (eflag_either) {
     if (eflag_global) {
       if (newton_bond) ev.evdwl += eangle;
@@ -332,9 +329,9 @@ void AngleCharmmKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int i, const in
     if (eflag_atom) {
       eanglethird = THIRD*eangle;
 
-      if (newton_bond || i < nlocal) v_eatom[i] += eanglethird;
-      if (newton_bond || j < nlocal) v_eatom[j] += eanglethird;
-      if (newton_bond || k < nlocal) v_eatom[k] += eanglethird;
+      if (newton_bond || i < nlocal) d_eatom[i] += eanglethird;
+      if (newton_bond || j < nlocal) d_eatom[j] += eanglethird;
+      if (newton_bond || k < nlocal) d_eatom[k] += eanglethird;
     }
   }
 
@@ -385,28 +382,28 @@ void AngleCharmmKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int i, const in
 
     if (vflag_atom) {
       if (newton_bond || i < nlocal) {
-        v_vatom(i,0) += THIRD*v[0];
-        v_vatom(i,1) += THIRD*v[1];
-        v_vatom(i,2) += THIRD*v[2];
-        v_vatom(i,3) += THIRD*v[3];
-        v_vatom(i,4) += THIRD*v[4];
-        v_vatom(i,5) += THIRD*v[5];
+        d_vatom(i,0) += THIRD*v[0];
+        d_vatom(i,1) += THIRD*v[1];
+        d_vatom(i,2) += THIRD*v[2];
+        d_vatom(i,3) += THIRD*v[3];
+        d_vatom(i,4) += THIRD*v[4];
+        d_vatom(i,5) += THIRD*v[5];
       }
       if (newton_bond || j < nlocal) {
-        v_vatom(j,0) += THIRD*v[0];
-        v_vatom(j,1) += THIRD*v[1];
-        v_vatom(j,2) += THIRD*v[2];
-        v_vatom(j,3) += THIRD*v[3];
-        v_vatom(j,4) += THIRD*v[4];
-        v_vatom(j,5) += THIRD*v[5];
+        d_vatom(j,0) += THIRD*v[0];
+        d_vatom(j,1) += THIRD*v[1];
+        d_vatom(j,2) += THIRD*v[2];
+        d_vatom(j,3) += THIRD*v[3];
+        d_vatom(j,4) += THIRD*v[4];
+        d_vatom(j,5) += THIRD*v[5];
       }
       if (newton_bond || k < nlocal) {
-        v_vatom(k,0) += THIRD*v[0];
-        v_vatom(k,1) += THIRD*v[1];
-        v_vatom(k,2) += THIRD*v[2];
-        v_vatom(k,3) += THIRD*v[3];
-        v_vatom(k,4) += THIRD*v[4];
-        v_vatom(k,5) += THIRD*v[5];
+        d_vatom(k,0) += THIRD*v[0];
+        d_vatom(k,1) += THIRD*v[1];
+        d_vatom(k,2) += THIRD*v[2];
+        d_vatom(k,3) += THIRD*v[3];
+        d_vatom(k,4) += THIRD*v[4];
+        d_vatom(k,5) += THIRD*v[5];
 
       }
     }
@@ -415,7 +412,10 @@ void AngleCharmmKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int i, const in
 
 /* ---------------------------------------------------------------------- */
 
+namespace LAMMPS_NS {
 template class AngleCharmmKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class AngleCharmmKokkos<LMPHostType>;
-#endif
\ No newline at end of file
+#endif
+}
+
diff --git a/src/KOKKOS/angle_charmm_kokkos.h b/src/KOKKOS/angle_charmm_kokkos.h
index 3ff623044b501605f499af074e2eafff686405af..18eeda74b58f7b106e5d8769038e4b449f1d40b5 100755
--- a/src/KOKKOS/angle_charmm_kokkos.h
+++ b/src/KOKKOS/angle_charmm_kokkos.h
@@ -60,27 +60,23 @@ class AngleCharmmKokkos : public AngleCharmm {
 
   class NeighborKokkos *neighborKK;
 
-  typename ArrayTypes<DeviceType>::t_x_array_randomread x;
-  typename ArrayTypes<DeviceType>::t_f_array f;
-  typename ArrayTypes<DeviceType>::t_int_2d anglelist;
+  typedef ArrayTypes<DeviceType> AT;
+  typename AT::t_x_array_randomread x;
+  typename Kokkos::View<double*[3],typename AT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > f;
+  typename AT::t_int_2d anglelist;
 
-  DAT::tdual_efloat_1d k_eatom;
-  DAT::tdual_virial_array k_vatom;
-  DAT::t_efloat_1d d_eatom;
-  DAT::t_virial_array d_vatom;
+  Kokkos::DualView<E_FLOAT*,Kokkos::LayoutRight,DeviceType> k_eatom;
+  Kokkos::DualView<F_FLOAT*[6],Kokkos::LayoutRight,DeviceType> k_vatom;
+  Kokkos::View<E_FLOAT*,Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > d_eatom;
+  Kokkos::View<F_FLOAT*[6],Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > d_vatom;
 
   int nlocal,newton_bond;
   int eflag,vflag;
 
-  DAT::tdual_ffloat_1d k_k;
-  DAT::tdual_ffloat_1d k_theta0;
-  DAT::tdual_ffloat_1d k_k_ub;
-  DAT::tdual_ffloat_1d k_r_ub;
-
-  DAT::t_ffloat_1d d_k;
-  DAT::t_ffloat_1d d_theta0;
-  DAT::t_ffloat_1d d_k_ub;
-  DAT::t_ffloat_1d d_r_ub;
+  typename AT::t_ffloat_1d d_k;
+  typename AT::t_ffloat_1d d_theta0;
+  typename AT::t_ffloat_1d d_k_ub;
+  typename AT::t_ffloat_1d d_r_ub;
 
   virtual void allocate();
 };
diff --git a/src/KOKKOS/angle_harmonic_kokkos.cpp b/src/KOKKOS/angle_harmonic_kokkos.cpp
index c5976f1bff680bc56787b68b06b7248f4ece7fb7..0d3110f9049c61f3f99b8213bfda590107f35f86 100755
--- a/src/KOKKOS/angle_harmonic_kokkos.cpp
+++ b/src/KOKKOS/angle_harmonic_kokkos.cpp
@@ -72,24 +72,24 @@ void AngleHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
   if (eflag_atom) {
     memory->destroy_kokkos(k_eatom,eatom);
     memory->create_kokkos(k_eatom,eatom,maxeatom,"angle:eatom");
-    d_eatom = k_eatom.d_view;
+    d_eatom = k_eatom.template view<DeviceType>();
   }
   if (vflag_atom) {
     memory->destroy_kokkos(k_vatom,vatom);
     memory->create_kokkos(k_vatom,vatom,maxvatom,6,"angle:vatom");
-    d_vatom = k_vatom.d_view;
+    d_vatom = k_vatom.template view<DeviceType>();
   }
 
-  atomKK->sync(execution_space,datamask_read);
+  //atomKK->sync(execution_space,datamask_read);
   k_k.template sync<DeviceType>();
   k_theta0.template sync<DeviceType>();
-  if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
-  else atomKK->modified(execution_space,F_MASK);
+  //  if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
+  //  else atomKK->modified(execution_space,F_MASK);
 
-  x = atomKK->k_x.view<DeviceType>();
-  f = atomKK->k_f.view<DeviceType>();
+  x = atomKK->k_x.template view<DeviceType>();
+  f = atomKK->k_f.template view<DeviceType>();
   neighborKK->k_anglelist.template sync<DeviceType>();
-  anglelist = neighborKK->k_anglelist.view<DeviceType>();
+  anglelist = neighborKK->k_anglelist.template view<DeviceType>();
   int nanglelist = neighborKK->nanglelist;
   nlocal = atom->nlocal;
   newton_bond = force->newton_bond;
@@ -113,7 +113,6 @@ void AngleHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagAngleHarmonicCompute<0,0> >(0,nanglelist),*this);
     }
   }
-  DeviceType::fence();
 
   if (eflag_global) energy += ev.evdwl;
   if (vflag_global) {
@@ -242,11 +241,11 @@ void AngleHarmonicKokkos<DeviceType>::allocate()
   AngleHarmonic::allocate();
 
   int n = atom->nangletypes;
-  k_k = DAT::tdual_ffloat_1d("AngleHarmonic::k",n+1);
-  k_theta0 = DAT::tdual_ffloat_1d("AngleHarmonic::theta0",n+1);
+  k_k = typename ArrayTypes<DeviceType>::tdual_ffloat_1d("AngleHarmonic::k",n+1);
+  k_theta0 = typename ArrayTypes<DeviceType>::tdual_ffloat_1d("AngleHarmonic::theta0",n+1);
 
-  d_k = k_k.d_view;
-  d_theta0 = k_theta0.d_view;
+  d_k = k_k.template view<DeviceType>();
+  d_theta0 = k_theta0.template view<DeviceType>();
 }
 
 /* ----------------------------------------------------------------------
@@ -285,8 +284,8 @@ void AngleHarmonicKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int i, const
   F_FLOAT v[6];
 
   // The eatom and vatom arrays are atomic
-  Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_eatom = k_eatom.view<DeviceType>();
-  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_vatom = k_vatom.view<DeviceType>();
+  Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_eatom = k_eatom.template view<DeviceType>();
+  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_vatom = k_vatom.template view<DeviceType>();
 
   if (eflag_either) {
     if (eflag_global) {
@@ -385,7 +384,10 @@ void AngleHarmonicKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int i, const
 
 /* ---------------------------------------------------------------------- */
 
+namespace LAMMPS_NS {
 template class AngleHarmonicKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class AngleHarmonicKokkos<LMPHostType>;
-#endif
\ No newline at end of file
+#endif
+}
+
diff --git a/src/KOKKOS/angle_harmonic_kokkos.h b/src/KOKKOS/angle_harmonic_kokkos.h
index 0fe906fbb7fe0e416cbae582cb356825c4c04e77..8e98cd6b7464d411a0f0d768b15ae392e2faa452 100755
--- a/src/KOKKOS/angle_harmonic_kokkos.h
+++ b/src/KOKKOS/angle_harmonic_kokkos.h
@@ -65,19 +65,19 @@ class AngleHarmonicKokkos : public AngleHarmonic {
   typename ArrayTypes<DeviceType>::t_f_array f;
   typename ArrayTypes<DeviceType>::t_int_2d anglelist;
 
-  DAT::tdual_efloat_1d k_eatom;
-  DAT::tdual_virial_array k_vatom;
-  DAT::t_efloat_1d d_eatom;
-  DAT::t_virial_array d_vatom;
+  typename ArrayTypes<DeviceType>::tdual_efloat_1d k_eatom;
+  typename ArrayTypes<DeviceType>::tdual_virial_array k_vatom;
+  typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
+  typename ArrayTypes<DeviceType>::t_virial_array d_vatom;
 
   int nlocal,newton_bond;
   int eflag,vflag;
 
-  DAT::tdual_ffloat_1d k_k;
-  DAT::tdual_ffloat_1d k_theta0;
+  typename ArrayTypes<DeviceType>::tdual_ffloat_1d k_k;
+  typename ArrayTypes<DeviceType>::tdual_ffloat_1d k_theta0;
 
-  DAT::t_ffloat_1d d_k;
-  DAT::t_ffloat_1d d_theta0;
+  typename ArrayTypes<DeviceType>::t_ffloat_1d d_k;
+  typename ArrayTypes<DeviceType>::t_ffloat_1d d_theta0;
 
   virtual void allocate();
 };
diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp
index 39cc4beebcb0adfa902b27b8564664ea7dabbee2..0e2e6038c1a873dac7e9cb0ba648002211188597 100644
--- a/src/KOKKOS/atom_kokkos.cpp
+++ b/src/KOKKOS/atom_kokkos.cpp
@@ -88,6 +88,10 @@ void AtomKokkos::modified(const ExecutionSpace space, unsigned int mask)
   ((AtomVecKokkos *) avec)->modified(space,mask);
 }
 
+void AtomKokkos::sync_overlapping_device(const ExecutionSpace space, unsigned int mask)
+{
+  ((AtomVecKokkos *) avec)->sync_overlapping_device(space,mask);
+}
 /* ---------------------------------------------------------------------- */
 
 void AtomKokkos::allocate_type_arrays()
diff --git a/src/KOKKOS/atom_kokkos.h b/src/KOKKOS/atom_kokkos.h
index 9bcd07aa4fe45f35f7f9b8ecd9358cf7ed0979e8..05aae712d9cbebccfcff1a67c65fa10d7b751a0a 100644
--- a/src/KOKKOS/atom_kokkos.h
+++ b/src/KOKKOS/atom_kokkos.h
@@ -57,6 +57,7 @@ class AtomKokkos : public Atom {
   virtual void allocate_type_arrays();
   void sync(const ExecutionSpace space, unsigned int mask);
   void modified(const ExecutionSpace space, unsigned int mask);
+  void sync_overlapping_device(const ExecutionSpace space, unsigned int mask);
   virtual void sort();
   virtual void grow(unsigned int mask);
   virtual void deallocate_topology();
diff --git a/src/KOKKOS/atom_vec_angle_kokkos.cpp b/src/KOKKOS/atom_vec_angle_kokkos.cpp
index f07e4a9e66bfe46a37f29064cc1b806504ae32c0..6d063793e01407f7fb07b8577d0f67677fcb30dc 100644
--- a/src/KOKKOS/atom_vec_angle_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_angle_kokkos.cpp
@@ -48,6 +48,7 @@ AtomVecAngleKokkos::AtomVecAngleKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp)
   k_count = DAT::tdual_int_1d("atom::k_count",1);
   atomKK = (AtomKokkos *) atom;
   commKK = (CommKokkos *) comm;
+  buffer = NULL;
 }
 
 /* ----------------------------------------------------------------------
@@ -1021,6 +1022,7 @@ void AtomVecAngleKokkos::unpack_border_vel(int n, int first, double *buf)
   last = first + n;
   for (i = first; i < last; i++) {
     if (i == nmax) grow(0);
+    modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
     h_x(i,0) = buf[m++];
     h_x(i,1) = buf[m++];
     h_x(i,2) = buf[m++];
@@ -1830,6 +1832,97 @@ void AtomVecAngleKokkos::sync(ExecutionSpace space, unsigned int mask)
   }
 }
 
+void AtomVecAngleKokkos::sync_overlapping_device(ExecutionSpace space, unsigned int mask)
+{
+  if (space == Device) {
+    if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space);
+    if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space);
+    if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space);
+    if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space);
+    if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space);
+    if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space);
+    if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space);
+    if ((mask & MOLECULE_MASK) && atomKK->k_molecule.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_molecule,space);
+    if (mask & SPECIAL_MASK) {
+      if (atomKK->k_nspecial.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_int_2d>(atomKK->k_nspecial,space);
+      if (atomKK->k_special.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_special,space);
+    }
+    if (mask & BOND_MASK) {
+      if (atomKK->k_num_bond.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_bond,space);
+      if (atomKK->k_bond_type.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_int_2d>(atomKK->k_bond_type,space);
+      if (atomKK->k_bond_atom.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_bond_atom,space);
+    }
+    if (mask & ANGLE_MASK) {
+      if (atomKK->k_num_angle.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_angle,space);
+      if (atomKK->k_angle_type.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_int_2d>(atomKK->k_angle_type,space);
+      if (atomKK->k_angle_atom1.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom1,space);
+      if (atomKK->k_angle_atom2.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom2,space);
+      if (atomKK->k_angle_atom3.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom3,space);
+    }
+  } else {
+    if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space);
+    if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space);
+    if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space);
+    if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space);
+    if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space);
+    if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space);
+    if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space);
+    if ((mask & MOLECULE_MASK) && atomKK->k_molecule.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_molecule,space);
+    if (mask & SPECIAL_MASK) {
+      if (atomKK->k_nspecial.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_int_2d>(atomKK->k_nspecial,space);
+      if (atomKK->k_special.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_special,space);
+    }
+    if (mask & BOND_MASK) {
+      if (atomKK->k_num_bond.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_bond,space);
+      if (atomKK->k_bond_type.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_int_2d>(atomKK->k_bond_type,space);
+      if (atomKK->k_bond_atom.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_bond_atom,space);
+    }
+    if (mask & ANGLE_MASK) {
+      if (atomKK->k_num_angle.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_angle,space);
+      if (atomKK->k_angle_type.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_int_2d>(atomKK->k_angle_type,space);
+      if (atomKK->k_angle_atom1.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom1,space);
+      if (atomKK->k_angle_atom2.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom2,space);
+      if (atomKK->k_angle_atom3.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom3,space);
+    }
+  }
+}
+
 /* ---------------------------------------------------------------------- */
 
 void AtomVecAngleKokkos::modified(ExecutionSpace space, unsigned int mask)
@@ -1886,3 +1979,4 @@ void AtomVecAngleKokkos::modified(ExecutionSpace space, unsigned int mask)
     }
   }
 }
+
diff --git a/src/KOKKOS/atom_vec_angle_kokkos.h b/src/KOKKOS/atom_vec_angle_kokkos.h
index 39874780f13add45909f7f761a06c439bb8c86a4..0a477b03866560f7b9175b8342b25f28c9cb814e 100644
--- a/src/KOKKOS/atom_vec_angle_kokkos.h
+++ b/src/KOKKOS/atom_vec_angle_kokkos.h
@@ -83,6 +83,7 @@ class AtomVecAngleKokkos : public AtomVecKokkos {
 
   void sync(ExecutionSpace space, unsigned int mask);
   void modified(ExecutionSpace space, unsigned int mask);
+  void sync_overlapping_device(ExecutionSpace space, unsigned int mask);
 
  protected:
 
diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.cpp b/src/KOKKOS/atom_vec_atomic_kokkos.cpp
index 4b50b9b9fd514a5ae66aac84d779a4ac8a3dc9fe..21170730773a82c15adff3a8d5a92cf7d4066fc5 100644
--- a/src/KOKKOS/atom_vec_atomic_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_atomic_kokkos.cpp
@@ -896,6 +896,7 @@ void AtomVecAtomicKokkos::unpack_border_vel(int n, int first, double *buf)
   last = first + n;
   for (i = first; i < last; i++) {
     if (i == nmax) grow(0);
+    modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
     h_x(i,0) = buf[m++];
     h_x(i,1) = buf[m++];
     h_x(i,2) = buf[m++];
@@ -1377,6 +1378,43 @@ void AtomVecAtomicKokkos::sync(ExecutionSpace space, unsigned int mask)
 
 /* ---------------------------------------------------------------------- */
 
+void AtomVecAtomicKokkos::sync_overlapping_device(ExecutionSpace space, unsigned int mask)
+{
+  if (space == Device) {
+    if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space);
+    if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space);
+    if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space);
+    if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space);
+    if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space);
+    if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space);
+    if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space);
+  } else {
+    if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space);
+    if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space);
+    if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space);
+    if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space);
+    if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space);
+    if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space);
+    if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
 void AtomVecAtomicKokkos::modified(ExecutionSpace space, unsigned int mask)
 {
   if (space == Device) {
@@ -1397,3 +1435,4 @@ void AtomVecAtomicKokkos::modified(ExecutionSpace space, unsigned int mask)
     if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPHostType>();
   }
 }
+
diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.h b/src/KOKKOS/atom_vec_atomic_kokkos.h
index 6e73f4ee3c706a1b1d5e3b62c39c5e5c96a55ede..0c3e24f9d9661303a51db514eb13f5ce416f9375 100644
--- a/src/KOKKOS/atom_vec_atomic_kokkos.h
+++ b/src/KOKKOS/atom_vec_atomic_kokkos.h
@@ -79,6 +79,7 @@ class AtomVecAtomicKokkos : public AtomVecKokkos {
 
   void sync(ExecutionSpace space, unsigned int mask);
   void modified(ExecutionSpace space, unsigned int mask);
+  void sync_overlapping_device(ExecutionSpace space, unsigned int mask);
 
  protected:
   tagint *tag;
diff --git a/src/KOKKOS/atom_vec_bond_kokkos.cpp b/src/KOKKOS/atom_vec_bond_kokkos.cpp
index 65ac756956509dcb7222f5250e39f947a818b31d..d42ec04834ac7f3e50a9cd967dcdf708654b2eb4 100644
--- a/src/KOKKOS/atom_vec_bond_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_bond_kokkos.cpp
@@ -970,6 +970,7 @@ void AtomVecBondKokkos::unpack_border_vel(int n, int first, double *buf)
   last = first + n;
   for (i = first; i < last; i++) {
     if (i == nmax) grow(0);
+    modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
     h_x(i,0) = buf[m++];
     h_x(i,1) = buf[m++];
     h_x(i,2) = buf[m++];
@@ -1673,6 +1674,75 @@ void AtomVecBondKokkos::sync(ExecutionSpace space, unsigned int mask)
 
 /* ---------------------------------------------------------------------- */
 
+void AtomVecBondKokkos::sync_overlapping_device(ExecutionSpace space, unsigned int mask)
+{
+  if (space == Device) {
+    if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space);
+    if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space);
+    if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space);
+    if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space);
+    if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space);
+    if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space);
+    if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space);
+    if ((mask & MOLECULE_MASK) && atomKK->k_molecule.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_molecule,space);
+    if (mask & SPECIAL_MASK) {
+      if (atomKK->k_nspecial.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_int_2d>(atomKK->k_nspecial,space);
+      if (atomKK->k_special.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_special,space);
+    }
+    if (mask & BOND_MASK) {
+      if (atomKK->k_num_bond.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_bond,space);
+      if (atomKK->k_bond_type.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_int_2d>(atomKK->k_bond_type,space);
+      if (atomKK->k_bond_atom.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_bond_atom,space);
+    }
+  } else {
+    if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space);
+    if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space);
+    if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space);
+    if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space);
+    if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space);
+    if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space);
+    if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space);
+    if ((mask & MOLECULE_MASK) && atomKK->k_molecule.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_molecule,space);
+    if (mask & SPECIAL_MASK) {
+      if (atomKK->k_nspecial.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_int_2d>(atomKK->k_nspecial,space);
+      if (atomKK->k_special.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_special,space);
+    }
+    if (mask & BOND_MASK) {
+      if (atomKK->k_num_bond.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_bond,space);
+      if (atomKK->k_bond_type.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_int_2d>(atomKK->k_bond_type,space);
+      if (atomKK->k_bond_atom.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_bond_atom,space);
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
 void AtomVecBondKokkos::modified(ExecutionSpace space, unsigned int mask)
 {
   if (space == Device) {
@@ -1713,3 +1783,4 @@ void AtomVecBondKokkos::modified(ExecutionSpace space, unsigned int mask)
     }
   }
 }
+
diff --git a/src/KOKKOS/atom_vec_bond_kokkos.h b/src/KOKKOS/atom_vec_bond_kokkos.h
index 1ff9d8fa4510a940b764bee2c4e31cfb7e5c08c2..e64017c99be6f8deb6a2055aeb611e310e4d9c0b 100644
--- a/src/KOKKOS/atom_vec_bond_kokkos.h
+++ b/src/KOKKOS/atom_vec_bond_kokkos.h
@@ -83,6 +83,7 @@ class AtomVecBondKokkos : public AtomVecKokkos {
 
   void sync(ExecutionSpace space, unsigned int mask);
   void modified(ExecutionSpace space, unsigned int mask);
+  void sync_overlapping_device(ExecutionSpace space, unsigned int mask);
 
  protected:
 
diff --git a/src/KOKKOS/atom_vec_charge_kokkos.cpp b/src/KOKKOS/atom_vec_charge_kokkos.cpp
index 87e9ac1cf4de58073d184a735eab23000e735d6c..4669dd8d7c8902975337c6978b98dc7565e92942 100644
--- a/src/KOKKOS/atom_vec_charge_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_charge_kokkos.cpp
@@ -939,6 +939,7 @@ void AtomVecChargeKokkos::unpack_border_vel(int n, int first, double *buf)
   last = first + n;
   for (i = first; i < last; i++) {
     if (i == nmax) grow(0);
+    modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK);
     h_x(i,0) = buf[m++];
     h_x(i,1) = buf[m++];
     h_x(i,2) = buf[m++];
@@ -1330,11 +1331,10 @@ void AtomVecChargeKokkos::create_atom(int itype, double *coord)
 {
   int nlocal = atom->nlocal;
   if (nlocal == nmax) {
-    //if(nlocal>2) printf("typeA: %i %i\n",type[0],type[1]);
     atomKK->modified(Host,ALL_MASK);
     grow(0);
-    //if(nlocal>2) printf("typeB: %i %i\n",type[0],type[1]);
   }
+  atomKK->sync(Host,ALL_MASK);
   atomKK->modified(Host,ALL_MASK);
 
   tag[nlocal] = 0;
@@ -1520,3 +1520,43 @@ void AtomVecChargeKokkos::modified(ExecutionSpace space, unsigned int mask)
     if (mask & Q_MASK) atomKK->k_q.modify<LMPHostType>();
   }
 }
+
+void AtomVecChargeKokkos::sync_overlapping_device(ExecutionSpace space, unsigned int mask)
+{
+  if (space == Device) {
+    if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space);
+    if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space);
+    if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space);
+    if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space);
+    if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space);
+    if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space);
+    if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space);
+    if ((mask & MOLECULE_MASK) && atomKK->k_q.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_float_1d>(atomKK->k_q,space);
+  } else {
+    if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space);
+    if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space);
+    if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space);
+    if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space);
+    if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space);
+    if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space);
+    if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space);
+    if ((mask & MOLECULE_MASK) && atomKK->k_q.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_float_1d>(atomKK->k_q,space);
+  }
+}
+
diff --git a/src/KOKKOS/atom_vec_charge_kokkos.h b/src/KOKKOS/atom_vec_charge_kokkos.h
index 46a05371aa11f2b16fe92ea56b3b113eac31bf48..38e32458c685d0ca5798f180dd177ed1cd2e9675 100644
--- a/src/KOKKOS/atom_vec_charge_kokkos.h
+++ b/src/KOKKOS/atom_vec_charge_kokkos.h
@@ -84,6 +84,7 @@ class AtomVecChargeKokkos : public AtomVecKokkos {
 
   void sync(ExecutionSpace space, unsigned int mask);
   void modified(ExecutionSpace space, unsigned int mask);
+  void sync_overlapping_device(ExecutionSpace space, unsigned int mask);
 
  protected:
   tagint *tag;
diff --git a/src/KOKKOS/atom_vec_full_kokkos.cpp b/src/KOKKOS/atom_vec_full_kokkos.cpp
index d41a88bd88f95954996fdf3421c73daede34a635..7c1ebeed3a1ca6be2666a57642924c6f7f111363 100644
--- a/src/KOKKOS/atom_vec_full_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_full_kokkos.cpp
@@ -1110,10 +1110,9 @@ void AtomVecFullKokkos::unpack_border(int n, int first, double *buf)
 
   m = 0;
   last = first + n;
-  if(n > 0)
-    modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
   for (i = first; i < last; i++) {
     if (i == nmax) grow(0);
+    modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
     h_x(i,0) = buf[m++];
     h_x(i,1) = buf[m++];
     h_x(i,2) = buf[m++];
@@ -1140,7 +1139,7 @@ void AtomVecFullKokkos::unpack_border_vel(int n, int first, double *buf)
   last = first + n;
   for (i = first; i < last; i++) {
     if (i == nmax) grow(0);
-    modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
+    modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
     h_x(i,0) = buf[m++];
     h_x(i,1) = buf[m++];
     h_x(i,2) = buf[m++];
@@ -1879,7 +1878,9 @@ int AtomVecFullKokkos::unpack_restart(double *buf)
     if (atom->nextra_store)
       memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra");
   }
-
+  sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
+       MASK_MASK | IMAGE_MASK | Q_MASK | MOLECULE_MASK | BOND_MASK |
+       ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK);
   modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
            MASK_MASK | IMAGE_MASK | Q_MASK | MOLECULE_MASK | BOND_MASK |
            ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK);
@@ -1955,6 +1956,7 @@ void AtomVecFullKokkos::create_atom(int itype, double *coord)
     atomKK->modified(Host,ALL_MASK);
     grow(0);
   }
+  atomKK->sync(Host,ALL_MASK);
   atomKK->modified(Host,ALL_MASK);
 
   tag[nlocal] = 0;
@@ -2246,6 +2248,157 @@ void AtomVecFullKokkos::sync(ExecutionSpace space, unsigned int mask)
 
 /* ---------------------------------------------------------------------- */
 
+void AtomVecFullKokkos::sync_overlapping_device(ExecutionSpace space, unsigned int mask)
+{
+  if (space == Device) {
+    if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space);
+    if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space);
+    if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space);
+    if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space);
+    if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space);
+    if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space);
+    if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space);
+    if ((mask & Q_MASK) && atomKK->k_q.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_float_1d>(atomKK->k_q,space);
+    if ((mask & MOLECULE_MASK) && atomKK->k_molecule.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_molecule,space);
+    if (mask & SPECIAL_MASK) {
+      if (atomKK->k_nspecial.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_int_2d>(atomKK->k_nspecial,space);
+      if (atomKK->k_special.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_special,space);
+    }
+    if (mask & BOND_MASK) {
+      if (atomKK->k_num_bond.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_bond,space);
+      if (atomKK->k_bond_type.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_int_2d>(atomKK->k_bond_type,space);
+      if (atomKK->k_bond_atom.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_bond_atom,space);
+    }
+    if (mask & ANGLE_MASK) {
+      if (atomKK->k_num_angle.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_angle,space);
+      if (atomKK->k_angle_type.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_int_2d>(atomKK->k_angle_type,space);
+      if (atomKK->k_angle_atom1.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom1,space);
+      if (atomKK->k_angle_atom2.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom2,space);
+      if (atomKK->k_angle_atom3.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom3,space);
+    }
+    if (mask & DIHEDRAL_MASK) {
+      if (atomKK->k_num_dihedral.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_dihedral,space);
+      if (atomKK->k_dihedral_type.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_int_2d>(atomKK->k_dihedral_type,space);
+      if (atomKK->k_dihedral_atom1.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_dihedral_atom1,space);
+      if (atomKK->k_dihedral_atom2.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_dihedral_atom2,space);
+      if (atomKK->k_dihedral_atom3.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_dihedral_atom3,space);
+    }
+    if (mask & IMPROPER_MASK) {
+      if (atomKK->k_num_improper.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_improper,space);
+      if (atomKK->k_improper_type.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_int_2d>(atomKK->k_improper_type,space);
+      if (atomKK->k_improper_atom1.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_improper_atom1,space);
+      if (atomKK->k_improper_atom2.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_improper_atom2,space);
+      if (atomKK->k_improper_atom3.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_improper_atom3,space);
+      if (atomKK->k_improper_atom4.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_improper_atom4,space);
+    }
+  } else {
+    if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space);
+    if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space);
+    if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space);
+    if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space);
+    if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space);
+    if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space);
+    if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space);
+    if ((mask & Q_MASK) && atomKK->k_q.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_float_1d>(atomKK->k_q,space);
+    if ((mask & MOLECULE_MASK) && atomKK->k_molecule.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_molecule,space);
+    if (mask & SPECIAL_MASK) {
+      if (atomKK->k_nspecial.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_int_2d>(atomKK->k_nspecial,space);
+      if (atomKK->k_special.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_special,space);
+    }
+    if (mask & BOND_MASK) {
+      if (atomKK->k_num_bond.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_bond,space);
+      if (atomKK->k_bond_type.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_int_2d>(atomKK->k_bond_type,space);
+      if (atomKK->k_bond_atom.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_bond_atom,space);
+    }
+    if (mask & ANGLE_MASK) {
+      if (atomKK->k_num_angle.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_angle,space);
+      if (atomKK->k_angle_type.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_int_2d>(atomKK->k_angle_type,space);
+      if (atomKK->k_angle_atom1.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom1,space);
+      if (atomKK->k_angle_atom2.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom2,space);
+      if (atomKK->k_angle_atom3.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom3,space);
+    }
+    if (mask & DIHEDRAL_MASK) {
+      if (atomKK->k_num_dihedral.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_dihedral,space);
+      if (atomKK->k_dihedral_type.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_int_2d>(atomKK->k_dihedral_type,space);
+      if (atomKK->k_dihedral_atom1.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_dihedral_atom1,space);
+      if (atomKK->k_dihedral_atom2.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_dihedral_atom2,space);
+      if (atomKK->k_dihedral_atom3.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_dihedral_atom3,space);
+      if (atomKK->k_dihedral_atom4.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_dihedral_atom4,space);
+    }
+    if (mask & IMPROPER_MASK) {
+      if (atomKK->k_num_improper.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_improper,space);
+      if (atomKK->k_improper_type.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_int_2d>(atomKK->k_improper_type,space);
+      if (atomKK->k_improper_atom1.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_improper_atom1,space);
+      if (atomKK->k_improper_atom2.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_improper_atom2,space);
+      if (atomKK->k_improper_atom3.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_improper_atom3,space);
+      if (atomKK->k_improper_atom4.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_improper_atom4,space);
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
 void AtomVecFullKokkos::modified(ExecutionSpace space, unsigned int mask)
 {
   if (space == Device) {
@@ -2334,3 +2487,4 @@ void AtomVecFullKokkos::modified(ExecutionSpace space, unsigned int mask)
     }
   }
 }
+
diff --git a/src/KOKKOS/atom_vec_full_kokkos.h b/src/KOKKOS/atom_vec_full_kokkos.h
index b66d6b70f07cfb6dcf52c51d7d3ec9a4a19ac553..841707b33888e081ac9fcebef45dec1734c8ff50 100644
--- a/src/KOKKOS/atom_vec_full_kokkos.h
+++ b/src/KOKKOS/atom_vec_full_kokkos.h
@@ -83,6 +83,7 @@ class AtomVecFullKokkos : public AtomVecKokkos {
 
   void sync(ExecutionSpace space, unsigned int mask);
   void modified(ExecutionSpace space, unsigned int mask);
+  void sync_overlapping_device(ExecutionSpace space, unsigned int mask);
 
  protected:
 
diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp
index 1d9174196a79583a4f4087446c8555d41403a348..5542991395157470204cf82b701c92c3908368d0 100644
--- a/src/KOKKOS/atom_vec_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_kokkos.cpp
@@ -20,4 +20,7 @@ using namespace LAMMPS_NS;
 AtomVecKokkos::AtomVecKokkos(LAMMPS *lmp) : AtomVec(lmp)
 {
   kokkosable = 1;
+  buffer = NULL;
+  buffer_size = 0;
 }
+
diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h
index b10c4a21565d46bed04664cc8a9ca6a4e9099e68..fbeeaf96be3a6415e62901a2edbfb3231c331952 100644
--- a/src/KOKKOS/atom_vec_kokkos.h
+++ b/src/KOKKOS/atom_vec_kokkos.h
@@ -16,6 +16,7 @@
 
 #include "atom_vec.h"
 #include "kokkos_type.h"
+#include <type_traits>
 
 namespace LAMMPS_NS {
 
@@ -26,6 +27,7 @@ class AtomVecKokkos : public AtomVec {
 
   virtual void sync(ExecutionSpace space, unsigned int mask) = 0;
   virtual void modified(ExecutionSpace space, unsigned int mask) = 0;
+  virtual void sync_overlapping_device(ExecutionSpace space, unsigned int mask) {};
 
   virtual int
     pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
@@ -65,6 +67,73 @@ class AtomVecKokkos : public AtomVec {
  protected:
 
   class CommKokkos *commKK;
+  size_t buffer_size;
+  void* buffer;
+
+  #ifdef KOKKOS_HAVE_CUDA
+  template<class ViewType>
+  Kokkos::View<typename ViewType::data_type,
+               typename ViewType::array_layout,
+               Kokkos::CudaHostPinnedSpace,
+               Kokkos::MemoryTraits<Kokkos::Unmanaged> >
+  create_async_copy(const ViewType& src) {
+    typedef Kokkos::View<typename ViewType::data_type,
+                 typename ViewType::array_layout,
+                 typename std::conditional<
+                   std::is_same<typename ViewType::execution_space,LMPDeviceType>::value,
+                   Kokkos::CudaHostPinnedSpace,typename ViewType::memory_space>::type,
+                 Kokkos::MemoryTraits<Kokkos::Unmanaged> > mirror_type;
+    if(buffer_size < src.capacity())
+       buffer = Kokkos::kokkos_realloc<Kokkos::CudaHostPinnedSpace>(buffer,src.capacity());
+    return mirror_type( buffer ,
+                             src.dimension_0() ,
+                             src.dimension_1() ,
+                             src.dimension_2() ,
+                             src.dimension_3() ,
+                             src.dimension_4() ,
+                             src.dimension_5() ,
+                             src.dimension_6() ,
+                             src.dimension_7() );
+  }
+
+  template<class ViewType>
+  void perform_async_copy(const ViewType& src, unsigned int space) {
+    typedef Kokkos::View<typename ViewType::data_type,
+                 typename ViewType::array_layout,
+                 typename std::conditional<
+                   std::is_same<typename ViewType::execution_space,LMPDeviceType>::value,
+                   Kokkos::CudaHostPinnedSpace,typename ViewType::memory_space>::type,
+                 Kokkos::MemoryTraits<Kokkos::Unmanaged> > mirror_type;
+    if(buffer_size < src.capacity())
+       buffer = Kokkos::kokkos_realloc<Kokkos::CudaHostPinnedSpace>(buffer,src.capacity()*sizeof(typename ViewType::value_type));
+    mirror_type tmp_view( (typename ViewType::value_type*)buffer ,
+                             src.dimension_0() ,
+                             src.dimension_1() ,
+                             src.dimension_2() ,
+                             src.dimension_3() ,
+                             src.dimension_4() ,
+                             src.dimension_5() ,
+                             src.dimension_6() ,
+                             src.dimension_7() );
+    if(space == Device) {
+      Kokkos::deep_copy(LMPHostType(),tmp_view,src.h_view),
+      Kokkos::deep_copy(LMPHostType(),src.d_view,tmp_view);
+      src.modified_device() = src.modified_host();
+    } else {
+      Kokkos::deep_copy(LMPHostType(),tmp_view,src.d_view),
+      Kokkos::deep_copy(LMPHostType(),src.h_view,tmp_view);
+      src.modified_device() = src.modified_host();
+    }
+  }
+  #else
+  template<class ViewType>
+  void perform_async_copy(ViewType& src, unsigned int space) {
+    if(space == Device)
+      src.template sync<LMPDeviceType>();
+    else
+      src.template sync<LMPHostType>();
+  }
+  #endif
 };
 
 }
diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.cpp b/src/KOKKOS/atom_vec_molecular_kokkos.cpp
index 848e146ac83e51b979b20e3c161fed732dcac244..7217ad49cbc4559e0cb1f591590a22a5d88c3bc3 100644
--- a/src/KOKKOS/atom_vec_molecular_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_molecular_kokkos.cpp
@@ -1094,6 +1094,7 @@ void AtomVecMolecularKokkos::unpack_border_vel(int n, int first, double *buf)
   last = first + n;
   for (i = first; i < last; i++) {
     if (i == nmax) grow(0);
+    modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
     h_x(i,0) = buf[m++];
     h_x(i,1) = buf[m++];
     h_x(i,2) = buf[m++];
@@ -2149,6 +2150,151 @@ void AtomVecMolecularKokkos::sync(ExecutionSpace space, unsigned int mask)
   }
 }
 
+void AtomVecMolecularKokkos::sync_overlapping_device(ExecutionSpace space, unsigned int mask)
+{
+  if (space == Device) {
+    if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space);
+    if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space);
+    if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space);
+    if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space);
+    if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space);
+    if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space);
+    if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space);
+    if ((mask & MOLECULE_MASK) && atomKK->k_molecule.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_molecule,space);
+    if (mask & SPECIAL_MASK) {
+      if (atomKK->k_nspecial.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_int_2d>(atomKK->k_nspecial,space);
+      if (atomKK->k_special.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_special,space);
+    }
+    if (mask & BOND_MASK) {
+      if (atomKK->k_num_bond.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_bond,space);
+      if (atomKK->k_bond_type.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_int_2d>(atomKK->k_bond_type,space);
+      if (atomKK->k_bond_atom.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_bond_atom,space);
+    }
+    if (mask & ANGLE_MASK) {
+      if (atomKK->k_num_angle.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_angle,space);
+      if (atomKK->k_angle_type.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_int_2d>(atomKK->k_angle_type,space);
+      if (atomKK->k_angle_atom1.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom1,space);
+      if (atomKK->k_angle_atom2.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom2,space);
+      if (atomKK->k_angle_atom3.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom3,space);
+    }
+    if (mask & DIHEDRAL_MASK) {
+      if (atomKK->k_num_dihedral.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_dihedral,space);
+      if (atomKK->k_dihedral_type.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_int_2d>(atomKK->k_dihedral_type,space);
+      if (atomKK->k_dihedral_atom1.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_dihedral_atom1,space);
+      if (atomKK->k_dihedral_atom2.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_dihedral_atom2,space);
+      if (atomKK->k_dihedral_atom3.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_dihedral_atom3,space);
+    }
+    if (mask & IMPROPER_MASK) {
+      if (atomKK->k_num_improper.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_improper,space);
+      if (atomKK->k_improper_type.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_int_2d>(atomKK->k_improper_type,space);
+      if (atomKK->k_improper_atom1.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_improper_atom1,space);
+      if (atomKK->k_improper_atom2.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_improper_atom2,space);
+      if (atomKK->k_improper_atom3.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_improper_atom3,space);
+      if (atomKK->k_improper_atom4.need_sync<LMPDeviceType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_improper_atom4,space);
+    }
+  } else {
+    if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space);
+    if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space);
+    if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space);
+    if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space);
+    if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space);
+    if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space);
+    if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space);
+    if ((mask & MOLECULE_MASK) && atomKK->k_molecule.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_molecule,space);
+    if (mask & SPECIAL_MASK) {
+      if (atomKK->k_nspecial.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_int_2d>(atomKK->k_nspecial,space);
+      if (atomKK->k_special.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_special,space);
+    }
+    if (mask & BOND_MASK) {
+      if (atomKK->k_num_bond.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_bond,space);
+      if (atomKK->k_bond_type.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_int_2d>(atomKK->k_bond_type,space);
+      if (atomKK->k_bond_atom.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_bond_atom,space);
+    }
+    if (mask & ANGLE_MASK) {
+      if (atomKK->k_num_angle.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_angle,space);
+      if (atomKK->k_angle_type.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_int_2d>(atomKK->k_angle_type,space);
+      if (atomKK->k_angle_atom1.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom1,space);
+      if (atomKK->k_angle_atom2.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom2,space);
+      if (atomKK->k_angle_atom3.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom3,space);
+    }
+    if (mask & DIHEDRAL_MASK) {
+      if (atomKK->k_num_dihedral.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_dihedral,space);
+      if (atomKK->k_dihedral_type.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_int_2d>(atomKK->k_dihedral_type,space);
+      if (atomKK->k_dihedral_atom1.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_dihedral_atom1,space);
+      if (atomKK->k_dihedral_atom2.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_dihedral_atom2,space);
+      if (atomKK->k_dihedral_atom3.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_dihedral_atom3,space);
+      if (atomKK->k_dihedral_atom4.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_dihedral_atom4,space);
+    }
+    if (mask & IMPROPER_MASK) {
+      if (atomKK->k_num_improper.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_improper,space);
+      if (atomKK->k_improper_type.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_int_2d>(atomKK->k_improper_type,space);
+      if (atomKK->k_improper_atom1.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_improper_atom1,space);
+      if (atomKK->k_improper_atom2.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_improper_atom2,space);
+      if (atomKK->k_improper_atom3.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_improper_atom3,space);
+      if (atomKK->k_improper_atom4.need_sync<LMPHostType>())
+        perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_improper_atom4,space);
+    }
+  }
+}
+
 /* ---------------------------------------------------------------------- */
 
 void AtomVecMolecularKokkos::modified(ExecutionSpace space, unsigned int mask)
@@ -2237,3 +2383,4 @@ void AtomVecMolecularKokkos::modified(ExecutionSpace space, unsigned int mask)
     }
   }
 }
+
diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.h b/src/KOKKOS/atom_vec_molecular_kokkos.h
index 9d867006573779bc2e1e9a68f3239894e82e74ee..4ec26621cc8397f2a565027b564d128df25f25d3 100644
--- a/src/KOKKOS/atom_vec_molecular_kokkos.h
+++ b/src/KOKKOS/atom_vec_molecular_kokkos.h
@@ -83,6 +83,7 @@ class AtomVecMolecularKokkos : public AtomVecKokkos {
 
   void sync(ExecutionSpace space, unsigned int mask);
   void modified(ExecutionSpace space, unsigned int mask);
+  void sync_overlapping_device(ExecutionSpace space, unsigned int mask);
 
  protected:
 
diff --git a/src/KOKKOS/bond_fene_kokkos.cpp b/src/KOKKOS/bond_fene_kokkos.cpp
index c8b643261a3146444a6f31f5ec582e80f2ed556e..7ff2e568d1c4c917c24fe050740788f22b008e7c 100755
--- a/src/KOKKOS/bond_fene_kokkos.cpp
+++ b/src/KOKKOS/bond_fene_kokkos.cpp
@@ -379,7 +379,10 @@ void BondFENEKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const int
 
 /* ---------------------------------------------------------------------- */
 
+namespace LAMMPS_NS {
 template class BondFENEKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class BondFENEKokkos<LMPHostType>;
-#endif
\ No newline at end of file
+#endif
+}
+
diff --git a/src/KOKKOS/bond_harmonic_kokkos.cpp b/src/KOKKOS/bond_harmonic_kokkos.cpp
index ba2ab19a0433f3204add21f802aab59f54a120c3..9b34786ec07be79892c8db776b6b9e9178900d72 100755
--- a/src/KOKKOS/bond_harmonic_kokkos.cpp
+++ b/src/KOKKOS/bond_harmonic_kokkos.cpp
@@ -67,26 +67,27 @@ void BondHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
   // reallocate per-atom arrays if necessary
 
   if (eflag_atom) {
-    memory->destroy_kokkos(k_eatom,eatom);
-    memory->create_kokkos(k_eatom,eatom,maxeatom,"bond:eatom");
-    d_eatom = k_eatom.d_view;
+    if(k_eatom.dimension_0()<maxeatom) {
+      memory->destroy_kokkos(k_eatom,eatom);
+      memory->create_kokkos(k_eatom,eatom,maxeatom,"improper:eatom");
+      d_eatom = k_eatom.d_view;
+    }
   }
   if (vflag_atom) {
-    memory->destroy_kokkos(k_vatom,vatom);
-    memory->create_kokkos(k_vatom,vatom,maxvatom,6,"bond:vatom");
-    d_vatom = k_vatom.d_view;
+    if(k_vatom.dimension_0()<maxvatom) {
+      memory->destroy_kokkos(k_vatom,vatom);
+      memory->create_kokkos(k_vatom,vatom,maxvatom,6,"improper:vatom");
+      d_vatom = k_vatom.d_view;
+    }
   }
 
-  atomKK->sync(execution_space,datamask_read);
-  k_k.template sync<DeviceType>();
-  k_r0.template sync<DeviceType>();
-  if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
-  else atomKK->modified(execution_space,F_MASK);
+//  if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
+//  else atomKK->modified(execution_space,F_MASK);
 
-  x = atomKK->k_x.view<DeviceType>();
-  f = atomKK->k_f.view<DeviceType>();
+  x = atomKK->k_x.template view<DeviceType>();
+  f = atomKK->k_f.template view<DeviceType>();
   neighborKK->k_bondlist.template sync<DeviceType>();
-  bondlist = neighborKK->k_bondlist.view<DeviceType>();
+  bondlist = neighborKK->k_bondlist.template view<DeviceType>();
   int nbondlist = neighborKK->nbondlist;
   nlocal = atom->nlocal;
   newton_bond = force->newton_bond;
@@ -110,7 +111,7 @@ void BondHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagBondHarmonicCompute<0,0> >(0,nbondlist),*this);
     }
   }
-  DeviceType::fence();
+  //DeviceType::fence();
 
   if (eflag_global) energy += ev.evdwl;
   if (vflag_global) {
@@ -140,9 +141,6 @@ template<int NEWTON_BOND, int EVFLAG>
 KOKKOS_INLINE_FUNCTION
 void BondHarmonicKokkos<DeviceType>::operator()(TagBondHarmonicCompute<NEWTON_BOND,EVFLAG>, const int &n, EV_FLOAT& ev) const {
 
-  // The f array is atomic
-  Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > a_f = f;
-
   const int i1 = bondlist(n,0);
   const int i2 = bondlist(n,1);
   const int type = bondlist(n,2);
@@ -168,15 +166,15 @@ void BondHarmonicKokkos<DeviceType>::operator()(TagBondHarmonicCompute<NEWTON_BO
   // apply force to each of 2 atoms
 
   if (NEWTON_BOND || i1 < nlocal) {
-    a_f(i1,0) += delx*fbond;
-    a_f(i1,1) += dely*fbond;
-    a_f(i1,2) += delz*fbond;
+    f(i1,0) += delx*fbond;
+    f(i1,1) += dely*fbond;
+    f(i1,2) += delz*fbond;
   }
 
   if (NEWTON_BOND || i2 < nlocal) {
-    a_f(i2,0) -= delx*fbond;
-    a_f(i2,1) -= dely*fbond;
-    a_f(i2,2) -= delz*fbond;
+    f(i2,0) -= delx*fbond;
+    f(i2,1) -= dely*fbond;
+    f(i2,2) -= delz*fbond;
   }
 
   if (EVFLAG) ev_tally(ev,i1,i2,ebond,fbond,delx,dely,delz);
@@ -196,13 +194,6 @@ template<class DeviceType>
 void BondHarmonicKokkos<DeviceType>::allocate()
 {
   BondHarmonic::allocate();
-
-  int n = atom->nbondtypes;
-  k_k = DAT::tdual_ffloat_1d("BondHarmonic::k",n+1);
-  k_r0 = DAT::tdual_ffloat_1d("BondHarmonic::r0",n+1);
-
-  d_k = k_k.d_view;
-  d_r0 = k_r0.d_view;
 }
 
 /* ----------------------------------------------------------------------
@@ -215,6 +206,12 @@ void BondHarmonicKokkos<DeviceType>::coeff(int narg, char **arg)
   BondHarmonic::coeff(narg, arg);
 
   int n = atom->nbondtypes;
+  Kokkos::DualView<F_FLOAT*,DeviceType> k_k("BondHarmonic::k",n+1);
+  Kokkos::DualView<F_FLOAT*,DeviceType> k_r0("BondHarmonic::r0",n+1);
+
+  d_k = k_k.template view<DeviceType>();
+  d_r0 = k_r0.template view<DeviceType>();
+
   for (int i = 1; i <= n; i++) {
     k_k.h_view[i] = k[i];
     k_r0.h_view[i] = r0[i];
@@ -222,6 +219,9 @@ void BondHarmonicKokkos<DeviceType>::coeff(int narg, char **arg)
 
   k_k.template modify<LMPHostType>();
   k_r0.template modify<LMPHostType>();
+  k_k.template sync<DeviceType>();
+  k_r0.template sync<DeviceType>();
+
 }
 
 /* ----------------------------------------------------------------------
@@ -238,10 +238,6 @@ void BondHarmonicKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const
   E_FLOAT ebondhalf;
   F_FLOAT v[6];
 
-  // The eatom and vatom arrays are atomic
-  Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_eatom = k_eatom.view<DeviceType>();
-  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_vatom = k_vatom.view<DeviceType>();
-
   if (eflag_either) {
     if (eflag_global) {
       if (newton_bond) ev.evdwl += ebond;
@@ -253,8 +249,8 @@ void BondHarmonicKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const
     }
     if (eflag_atom) {
       ebondhalf = 0.5*ebond;
-      if (newton_bond || i < nlocal) v_eatom[i] += ebondhalf;
-      if (newton_bond || j < nlocal) v_eatom[j] += ebondhalf;
+      if (newton_bond || i < nlocal) d_eatom[i] += ebondhalf;
+      if (newton_bond || j < nlocal) d_eatom[j] += ebondhalf;
     }
   }
 
@@ -296,20 +292,20 @@ void BondHarmonicKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const
 
     if (vflag_atom) {
       if (newton_bond || i < nlocal) {
-        v_vatom(i,0) += 0.5*v[0];
-        v_vatom(i,1) += 0.5*v[1];
-        v_vatom(i,2) += 0.5*v[2];
-        v_vatom(i,3) += 0.5*v[3];
-        v_vatom(i,4) += 0.5*v[4];
-        v_vatom(i,5) += 0.5*v[5];
+        d_vatom(i,0) += 0.5*v[0];
+        d_vatom(i,1) += 0.5*v[1];
+        d_vatom(i,2) += 0.5*v[2];
+        d_vatom(i,3) += 0.5*v[3];
+        d_vatom(i,4) += 0.5*v[4];
+        d_vatom(i,5) += 0.5*v[5];
       }
       if (newton_bond || j < nlocal) {
-        v_vatom(j,0) += 0.5*v[0];
-        v_vatom(j,1) += 0.5*v[1];
-        v_vatom(j,2) += 0.5*v[2];
-        v_vatom(j,3) += 0.5*v[3];
-        v_vatom(j,4) += 0.5*v[4];
-        v_vatom(j,5) += 0.5*v[5];
+        d_vatom(j,0) += 0.5*v[0];
+        d_vatom(j,1) += 0.5*v[1];
+        d_vatom(j,2) += 0.5*v[2];
+        d_vatom(j,3) += 0.5*v[3];
+        d_vatom(j,4) += 0.5*v[4];
+        d_vatom(j,5) += 0.5*v[5];
       }
     }
   }
@@ -317,7 +313,10 @@ void BondHarmonicKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const
 
 /* ---------------------------------------------------------------------- */
 
+namespace LAMMPS_NS {
 template class BondHarmonicKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class BondHarmonicKokkos<LMPHostType>;
-#endif
\ No newline at end of file
+#endif
+}
+
diff --git a/src/KOKKOS/bond_harmonic_kokkos.h b/src/KOKKOS/bond_harmonic_kokkos.h
index f2d692c9cc24825569c84d85485fb806f4354761..dc42fbcd5c509623f892213efd493dad62055c14 100755
--- a/src/KOKKOS/bond_harmonic_kokkos.h
+++ b/src/KOKKOS/bond_harmonic_kokkos.h
@@ -60,23 +60,21 @@ class BondHarmonicKokkos : public BondHarmonic {
 
   class NeighborKokkos *neighborKK;
 
-  typename ArrayTypes<DeviceType>::t_x_array_randomread x;
-  typename ArrayTypes<DeviceType>::t_f_array f;
-  typename ArrayTypes<DeviceType>::t_int_2d bondlist;
+  typedef ArrayTypes<DeviceType> AT;
+  typename AT::t_x_array_randomread x;
+  typename Kokkos::View<double*[3],typename AT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > f;
+  typename AT::t_int_2d bondlist;
 
-  DAT::tdual_efloat_1d k_eatom;
-  DAT::tdual_virial_array k_vatom;
-  DAT::t_efloat_1d d_eatom;
-  DAT::t_virial_array d_vatom;
+  Kokkos::DualView<E_FLOAT*,Kokkos::LayoutRight,DeviceType> k_eatom;
+  Kokkos::DualView<F_FLOAT*[6],Kokkos::LayoutRight,DeviceType> k_vatom;
+  Kokkos::View<E_FLOAT*,Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > d_eatom;
+  Kokkos::View<F_FLOAT*[6],Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > d_vatom;
 
   int nlocal,newton_bond;
   int eflag,vflag;
 
-  DAT::tdual_ffloat_1d k_k;
-  DAT::tdual_ffloat_1d k_r0;
-
-  DAT::t_ffloat_1d d_k;
-  DAT::t_ffloat_1d d_r0;
+  typename AT::t_ffloat_1d d_k;
+  typename AT::t_ffloat_1d d_r0;
 
   virtual void allocate();
 };
diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp
index eec7f91ee2e0f3946d98ae1135644bf9fd21b3b9..9b3064f219c09bf6b0ed8b6f073acaf37b8a46dc 100644
--- a/src/KOKKOS/comm_kokkos.cpp
+++ b/src/KOKKOS/comm_kokkos.cpp
@@ -105,9 +105,9 @@ void CommKokkos::init()
 
   int check_forward = 0;
   int check_reverse = 0;
-  if (force->pair && !force->pair->execution_space == Device)
+  if (force->pair && (force->pair->execution_space == Host))
     check_forward += force->pair->comm_forward;
-  if (force->pair && !force->pair->execution_space == Device)
+  if (force->pair && (force->pair->execution_space == Host))
     check_reverse += force->pair->comm_reverse;
 
   for (int i = 0; i < modify->nfix; i++) {
@@ -995,3 +995,4 @@ void CommKokkos::grow_swap(int n)
   memory->grow(maxsendlist,n,"comm:maxsendlist");
   for (int i=0;i<maxswap;i++) maxsendlist[i]=size;
 }
+
diff --git a/src/KOKKOS/compute_temp_kokkos.cpp b/src/KOKKOS/compute_temp_kokkos.cpp
index eeb5bdcf003d8ecaafaa1999c3f5894cda51cadf..6a24591d6cbb66842df6cddefeef873f8f78203b 100755
--- a/src/KOKKOS/compute_temp_kokkos.cpp
+++ b/src/KOKKOS/compute_temp_kokkos.cpp
@@ -149,7 +149,10 @@ void ComputeTempKokkos<DeviceType>::operator()(TagComputeTempVector<RMASS>, cons
   }
 }
 
+namespace LAMMPS_NS {
 template class ComputeTempKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class ComputeTempKokkos<LMPHostType>;
-#endif
\ No newline at end of file
+#endif
+}
+
diff --git a/src/KOKKOS/dihedral_charmm_kokkos.cpp b/src/KOKKOS/dihedral_charmm_kokkos.cpp
index 6042a1a75b23b0b6cc1fbc2d579630bf9bb36e2e..3ae37993c100eb6c9b69bbef1223fe5c5ecfe440 100755
--- a/src/KOKKOS/dihedral_charmm_kokkos.cpp
+++ b/src/KOKKOS/dihedral_charmm_kokkos.cpp
@@ -45,8 +45,8 @@ DihedralCharmmKokkos<DeviceType>::DihedralCharmmKokkos(LAMMPS *lmp) : DihedralCh
   datamask_read = X_MASK | F_MASK | Q_MASK | ENERGY_MASK | VIRIAL_MASK | TYPE_MASK;
   datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
 
-  k_warning_flag = DAT::tdual_int_scalar("Dihedral:warning_flag");
-  d_warning_flag = k_warning_flag.view<DeviceType>();
+  k_warning_flag = Kokkos::DualView<int,DeviceType>("Dihedral:warning_flag");
+  d_warning_flag = k_warning_flag.template view<DeviceType>();
   h_warning_flag = k_warning_flag.h_view;
 }
 
@@ -80,30 +80,26 @@ void DihedralCharmmKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
   // reallocate per-atom arrays if necessary
 
   if (eflag_atom) {
-    memory->destroy_kokkos(k_eatom,eatom);
-    memory->create_kokkos(k_eatom,eatom,maxeatom,"dihedral:eatom");
-    d_eatom = k_eatom.d_view;
+    if(k_eatom.dimension_0()<maxeatom) {
+      memory->destroy_kokkos(k_eatom,eatom);
+      memory->create_kokkos(k_eatom,eatom,maxeatom,"dihedral:eatom");
+      d_eatom = k_eatom.d_view;
+      k_eatom_pair = Kokkos::DualView<E_FLOAT*,Kokkos::LayoutRight,DeviceType>("dihedral:eatom_pair",maxeatom);
+      d_eatom_pair = k_eatom.d_view;
+    }
   }
   if (vflag_atom) {
-    memory->destroy_kokkos(k_vatom,vatom);
-    memory->create_kokkos(k_vatom,vatom,maxvatom,6,"dihedral:vatom");
-    d_vatom = k_vatom.d_view;
+    if(k_vatom.dimension_0()<maxvatom) {
+      memory->destroy_kokkos(k_vatom,vatom);
+      memory->create_kokkos(k_vatom,vatom,maxvatom,6,"dihedral:vatom");
+      d_vatom = k_vatom.d_view;
+      k_vatom_pair = Kokkos::DualView<F_FLOAT*[6],Kokkos::LayoutRight,DeviceType>("dihedral:vatom_pair",maxvatom);
+      d_vatom_pair = k_vatom.d_view;
+    }
   }
 
-  k_eatom_pair = DAT::tdual_efloat_1d("dihedral:eatom_pair",maxeatom);
-  k_vatom_pair = DAT::tdual_virial_array("dihedral:vatom_pair",maxvatom);
 
-  atomKK->sync(execution_space,datamask_read);
-  k_lj14_1.template sync<DeviceType>();
-  k_lj14_2.template sync<DeviceType>();
-  k_lj14_3.template sync<DeviceType>();
-  k_lj14_4.template sync<DeviceType>();
-  k_k.template sync<DeviceType>();
-  k_multiplicity.template sync<DeviceType>();
-  k_shift.template sync<DeviceType>();
-  k_cos_shift.template sync<DeviceType>();
-  k_sin_shift.template sync<DeviceType>();
-  k_weight.template sync<DeviceType>();
+  //atomKK->sync(execution_space,datamask_read);
   if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
   else atomKK->modified(execution_space,F_MASK);
 
@@ -423,25 +419,24 @@ template<class DeviceType>
 void DihedralCharmmKokkos<DeviceType>::allocate()
 {
   DihedralCharmm::allocate();
+}
 
-  int n = atom->ntypes;
-  k_lj14_1 = DAT::tdual_ffloat_2d("DihedralCharmm:lj14_1",n+1,n+1);
-  k_lj14_2 = DAT::tdual_ffloat_2d("DihedralCharmm:lj14_2",n+1,n+1);
-  k_lj14_3 = DAT::tdual_ffloat_2d("DihedralCharmm:lj14_3",n+1,n+1);
-  k_lj14_4 = DAT::tdual_ffloat_2d("DihedralCharmm:lj14_4",n+1,n+1);
+/* ----------------------------------------------------------------------
+   set coeffs for one or more types
+------------------------------------------------------------------------- */
 
-  d_lj14_1 = k_lj14_1.d_view;
-  d_lj14_2 = k_lj14_2.d_view;
-  d_lj14_3 = k_lj14_3.d_view;
-  d_lj14_4 = k_lj14_4.d_view;
+template<class DeviceType>
+void DihedralCharmmKokkos<DeviceType>::coeff(int narg, char **arg)
+{
+  DihedralCharmm::coeff(narg, arg);
 
   int nd = atom->ndihedraltypes;
-  k_k = DAT::tdual_ffloat_1d("DihedralCharmm::k",nd+1);
-  k_multiplicity = DAT::tdual_ffloat_1d("DihedralCharmm::multiplicity",nd+1);
-  k_shift = DAT::tdual_ffloat_1d("DihedralCharmm::shift",nd+1);
-  k_cos_shift = DAT::tdual_ffloat_1d("DihedralCharmm::cos_shift",nd+1);
-  k_sin_shift = DAT::tdual_ffloat_1d("DihedralCharmm::sin_shift",nd+1);
-  k_weight = DAT::tdual_ffloat_1d("DihedralCharmm::weight",nd+1);
+  Kokkos::DualView<F_FLOAT*,DeviceType> k_k("DihedralCharmm::k",nd+1);
+  Kokkos::DualView<F_FLOAT*,DeviceType> k_multiplicity("DihedralCharmm::multiplicity",nd+1);
+  Kokkos::DualView<F_FLOAT*,DeviceType> k_shift("DihedralCharmm::shift",nd+1);
+  Kokkos::DualView<F_FLOAT*,DeviceType> k_cos_shift("DihedralCharmm::cos_shift",nd+1);
+  Kokkos::DualView<F_FLOAT*,DeviceType> k_sin_shift("DihedralCharmm::sin_shift",nd+1);
+  Kokkos::DualView<F_FLOAT*,DeviceType> k_weight("DihedralCharmm::weight",nd+1);
 
   d_k = k_k.d_view;
   d_multiplicity = k_multiplicity.d_view;
@@ -449,16 +444,6 @@ void DihedralCharmmKokkos<DeviceType>::allocate()
   d_cos_shift = k_cos_shift.d_view;
   d_sin_shift = k_sin_shift.d_view;
   d_weight = k_weight.d_view;
-}
-
-/* ----------------------------------------------------------------------
-   set coeffs for one or more types
-------------------------------------------------------------------------- */
-
-template<class DeviceType>
-void DihedralCharmmKokkos<DeviceType>::coeff(int narg, char **arg)
-{
-  DihedralCharmm::coeff(narg, arg);
 
   int n = atom->ndihedraltypes;
   for (int i = 1; i <= n; i++) {
@@ -476,6 +461,13 @@ void DihedralCharmmKokkos<DeviceType>::coeff(int narg, char **arg)
   k_cos_shift.template modify<LMPHostType>();
   k_sin_shift.template modify<LMPHostType>();
   k_weight.template modify<LMPHostType>();
+
+  k_k.template sync<DeviceType>();
+  k_multiplicity.template sync<DeviceType>();
+  k_shift.template sync<DeviceType>();
+  k_cos_shift.template sync<DeviceType>();
+  k_sin_shift.template sync<DeviceType>();
+  k_weight.template sync<DeviceType>();
 }
 
 /* ----------------------------------------------------------------------
@@ -487,6 +479,18 @@ void DihedralCharmmKokkos<DeviceType>::init_style()
 {
   DihedralCharmm::init_style();
 
+  int n = atom->ntypes;
+  Kokkos::DualView<F_FLOAT**,Kokkos::LayoutRight,DeviceType> k_lj14_1("DihedralCharmm:lj14_1",n+1,n+1);
+  Kokkos::DualView<F_FLOAT**,Kokkos::LayoutRight,DeviceType> k_lj14_2("DihedralCharmm:lj14_2",n+1,n+1);
+  Kokkos::DualView<F_FLOAT**,Kokkos::LayoutRight,DeviceType> k_lj14_3("DihedralCharmm:lj14_3",n+1,n+1);
+  Kokkos::DualView<F_FLOAT**,Kokkos::LayoutRight,DeviceType> k_lj14_4("DihedralCharmm:lj14_4",n+1,n+1);
+
+  d_lj14_1 = k_lj14_1.d_view;
+  d_lj14_2 = k_lj14_2.d_view;
+  d_lj14_3 = k_lj14_3.d_view;
+  d_lj14_4 = k_lj14_4.d_view;
+
+
   if (weightflag) {
     int n = atom->ntypes;
     for (int i = 1; i <= n; i++) {
@@ -503,6 +507,12 @@ void DihedralCharmmKokkos<DeviceType>::init_style()
   k_lj14_2.template modify<LMPHostType>();
   k_lj14_3.template modify<LMPHostType>();
   k_lj14_4.template modify<LMPHostType>();
+
+  k_lj14_1.template sync<DeviceType>();
+  k_lj14_2.template sync<DeviceType>();
+  k_lj14_3.template sync<DeviceType>();
+  k_lj14_4.template sync<DeviceType>();
+
 }
 
 /* ----------------------------------------------------------------------
@@ -524,10 +534,6 @@ void DihedralCharmmKokkos<DeviceType>::ev_tally(EVM_FLOAT &evm, const int i1, co
   E_FLOAT edihedralquarter;
   F_FLOAT v[6];
 
-  // The eatom and vatom arrays are atomic
-  Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_eatom = k_eatom.view<DeviceType>();
-  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_vatom = k_vatom.view<DeviceType>();
-
   if (eflag_either) {
     if (eflag_global) {
       if (newton_bond) evm.emol += edihedral;
@@ -541,10 +547,10 @@ void DihedralCharmmKokkos<DeviceType>::ev_tally(EVM_FLOAT &evm, const int i1, co
     }
     if (eflag_atom) {
       edihedralquarter = 0.25*edihedral;
-      if (newton_bond || i1 < nlocal) v_eatom[i1] += edihedralquarter;
-      if (newton_bond || i2 < nlocal) v_eatom[i2] += edihedralquarter;
-      if (newton_bond || i3 < nlocal) v_eatom[i3] += edihedralquarter;
-      if (newton_bond || i4 < nlocal) v_eatom[i4] += edihedralquarter;
+      if (newton_bond || i1 < nlocal) d_eatom[i1] += edihedralquarter;
+      if (newton_bond || i2 < nlocal) d_eatom[i2] += edihedralquarter;
+      if (newton_bond || i3 < nlocal) d_eatom[i3] += edihedralquarter;
+      if (newton_bond || i4 < nlocal) d_eatom[i4] += edihedralquarter;
     }
   }
 
@@ -602,36 +608,36 @@ void DihedralCharmmKokkos<DeviceType>::ev_tally(EVM_FLOAT &evm, const int i1, co
 
     if (vflag_atom) {
       if (newton_bond || i1 < nlocal) {
-        v_vatom(i1,0) += 0.25*v[0];
-        v_vatom(i1,1) += 0.25*v[1];
-        v_vatom(i1,2) += 0.25*v[2];
-        v_vatom(i1,3) += 0.25*v[3];
-        v_vatom(i1,4) += 0.25*v[4];
-        v_vatom(i1,5) += 0.25*v[5];
+        d_vatom(i1,0) += 0.25*v[0];
+        d_vatom(i1,1) += 0.25*v[1];
+        d_vatom(i1,2) += 0.25*v[2];
+        d_vatom(i1,3) += 0.25*v[3];
+        d_vatom(i1,4) += 0.25*v[4];
+        d_vatom(i1,5) += 0.25*v[5];
       }
       if (newton_bond || i2 < nlocal) {
-        v_vatom(i2,0) += 0.25*v[0];
-        v_vatom(i2,1) += 0.25*v[1];
-        v_vatom(i2,2) += 0.25*v[2];
-        v_vatom(i2,3) += 0.25*v[3];
-        v_vatom(i2,4) += 0.25*v[4];
-        v_vatom(i2,5) += 0.25*v[5];
+        d_vatom(i2,0) += 0.25*v[0];
+        d_vatom(i2,1) += 0.25*v[1];
+        d_vatom(i2,2) += 0.25*v[2];
+        d_vatom(i2,3) += 0.25*v[3];
+        d_vatom(i2,4) += 0.25*v[4];
+        d_vatom(i2,5) += 0.25*v[5];
       }
       if (newton_bond || i3 < nlocal) {
-        v_vatom(i3,0) += 0.25*v[0];
-        v_vatom(i3,1) += 0.25*v[1];
-        v_vatom(i3,2) += 0.25*v[2];
-        v_vatom(i3,3) += 0.25*v[3];
-        v_vatom(i3,4) += 0.25*v[4];
-        v_vatom(i3,5) += 0.25*v[5];
+        d_vatom(i3,0) += 0.25*v[0];
+        d_vatom(i3,1) += 0.25*v[1];
+        d_vatom(i3,2) += 0.25*v[2];
+        d_vatom(i3,3) += 0.25*v[3];
+        d_vatom(i3,4) += 0.25*v[4];
+        d_vatom(i3,5) += 0.25*v[5];
       }
       if (newton_bond || i4 < nlocal) {
-        v_vatom(i4,0) += 0.25*v[0];
-        v_vatom(i4,1) += 0.25*v[1];
-        v_vatom(i4,2) += 0.25*v[2];
-        v_vatom(i4,3) += 0.25*v[3];
-        v_vatom(i4,4) += 0.25*v[4];
-        v_vatom(i4,5) += 0.25*v[5];
+        d_vatom(i4,0) += 0.25*v[0];
+        d_vatom(i4,1) += 0.25*v[1];
+        d_vatom(i4,2) += 0.25*v[2];
+        d_vatom(i4,3) += 0.25*v[3];
+        d_vatom(i4,4) += 0.25*v[4];
+        d_vatom(i4,5) += 0.25*v[5];
       }
     }
   }
@@ -651,9 +657,6 @@ void DihedralCharmmKokkos<DeviceType>::ev_tally(EVM_FLOAT &evm, const int i, con
   E_FLOAT evdwlhalf,ecoulhalf,epairhalf;
   F_FLOAT v[6];
 
-  // The eatom and vatom arrays are atomic
-  Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_eatom_pair = k_eatom_pair.view<DeviceType>();
-  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_vatom_pair = k_vatom_pair.view<DeviceType>();
 
   if (eflag_either) {
     if (eflag_global) {
@@ -675,8 +678,8 @@ void DihedralCharmmKokkos<DeviceType>::ev_tally(EVM_FLOAT &evm, const int i, con
     }
     if (eflag_atom) {
       epairhalf = 0.5 * (evdwl + ecoul);
-      if (newton_bond || i < nlocal) v_eatom_pair[i] += epairhalf;
-      if (newton_bond || j < nlocal) v_eatom_pair[j] += epairhalf;
+      if (newton_bond || i < nlocal) d_eatom_pair[i] += epairhalf;
+      if (newton_bond || j < nlocal) d_eatom_pair[j] += epairhalf;
     }
   }
 
@@ -718,20 +721,20 @@ void DihedralCharmmKokkos<DeviceType>::ev_tally(EVM_FLOAT &evm, const int i, con
 
     if (vflag_atom) {
       if (newton_bond || i < nlocal) {
-        v_vatom_pair(i,0) += 0.5*v[0];
-        v_vatom_pair(i,1) += 0.5*v[1];
-        v_vatom_pair(i,2) += 0.5*v[2];
-        v_vatom_pair(i,3) += 0.5*v[3];
-        v_vatom_pair(i,4) += 0.5*v[4];
-        v_vatom_pair(i,5) += 0.5*v[5];
+        d_vatom_pair(i,0) += 0.5*v[0];
+        d_vatom_pair(i,1) += 0.5*v[1];
+        d_vatom_pair(i,2) += 0.5*v[2];
+        d_vatom_pair(i,3) += 0.5*v[3];
+        d_vatom_pair(i,4) += 0.5*v[4];
+        d_vatom_pair(i,5) += 0.5*v[5];
       }
       if (newton_bond || j < nlocal) {
-        v_vatom_pair(j,0) += 0.5*v[0];
-        v_vatom_pair(j,1) += 0.5*v[1];
-        v_vatom_pair(j,2) += 0.5*v[2];
-        v_vatom_pair(j,3) += 0.5*v[3];
-        v_vatom_pair(j,4) += 0.5*v[4];
-        v_vatom_pair(j,5) += 0.5*v[5];
+        d_vatom_pair(j,0) += 0.5*v[0];
+        d_vatom_pair(j,1) += 0.5*v[1];
+        d_vatom_pair(j,2) += 0.5*v[2];
+        d_vatom_pair(j,3) += 0.5*v[3];
+        d_vatom_pair(j,4) += 0.5*v[4];
+        d_vatom_pair(j,5) += 0.5*v[5];
       }
     }
   }
@@ -739,7 +742,10 @@ void DihedralCharmmKokkos<DeviceType>::ev_tally(EVM_FLOAT &evm, const int i, con
 
 /* ---------------------------------------------------------------------- */
 
+namespace LAMMPS_NS {
 template class DihedralCharmmKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class DihedralCharmmKokkos<LMPHostType>;
-#endif
\ No newline at end of file
+#endif
+}
+
diff --git a/src/KOKKOS/dihedral_charmm_kokkos.h b/src/KOKKOS/dihedral_charmm_kokkos.h
index 2ebae7e98541b0e3fbabfba98c97f93d76b3e438..2ee596353a6a7a6e99d6420f90e1d2f4fa441231 100755
--- a/src/KOKKOS/dihedral_charmm_kokkos.h
+++ b/src/KOKKOS/dihedral_charmm_kokkos.h
@@ -131,47 +131,35 @@ class DihedralCharmmKokkos : public DihedralCharmm {
   typename AT::t_f_array f;
   typename AT::t_int_2d dihedrallist;
 
-  DAT::tdual_efloat_1d k_eatom;
-  DAT::tdual_virial_array k_vatom;
-  DAT::t_efloat_1d d_eatom;
-  DAT::t_virial_array d_vatom;
+  Kokkos::DualView<E_FLOAT*,Kokkos::LayoutRight,DeviceType> k_eatom;
+  Kokkos::DualView<F_FLOAT*[6],Kokkos::LayoutRight,DeviceType> k_vatom;
+  Kokkos::View<E_FLOAT*,Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > d_eatom;
+  Kokkos::View<F_FLOAT*[6],Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > d_vatom;
 
-  DAT::tdual_efloat_1d k_eatom_pair;
-  DAT::tdual_virial_array k_vatom_pair;
-  DAT::t_efloat_1d d_eatom_pair;
-  DAT::t_virial_array d_vatom_pair;
+  Kokkos::DualView<E_FLOAT*,Kokkos::LayoutRight,DeviceType> k_eatom_pair;
+  Kokkos::DualView<F_FLOAT*[6],Kokkos::LayoutRight,DeviceType> k_vatom_pair;
+  Kokkos::View<E_FLOAT*,Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > d_eatom_pair;
+  Kokkos::View<F_FLOAT*[6],Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > d_vatom_pair;
 
   int nlocal,newton_bond;
   int eflag,vflag;
   double qqrd2e;
 
-  DAT::tdual_int_scalar k_warning_flag;
-  typename AT::t_int_scalar d_warning_flag;
-  HAT::t_int_scalar h_warning_flag;
-
-  DAT::tdual_ffloat_2d k_lj14_1;
-  DAT::tdual_ffloat_2d k_lj14_2;
-  DAT::tdual_ffloat_2d k_lj14_3;
-  DAT::tdual_ffloat_2d k_lj14_4;
-
-  DAT::t_ffloat_2d d_lj14_1;
-  DAT::t_ffloat_2d d_lj14_2;
-  DAT::t_ffloat_2d d_lj14_3;
-  DAT::t_ffloat_2d d_lj14_4;
-
-  DAT::tdual_ffloat_1d k_k;
-  DAT::tdual_ffloat_1d k_multiplicity;
-  DAT::tdual_ffloat_1d k_shift;
-  DAT::tdual_ffloat_1d k_sin_shift;
-  DAT::tdual_ffloat_1d k_cos_shift;
-  DAT::tdual_ffloat_1d k_weight;
-
-  DAT::t_ffloat_1d d_k;
-  DAT::t_ffloat_1d d_multiplicity;
-  DAT::t_ffloat_1d d_shift;
-  DAT::t_ffloat_1d d_sin_shift;
-  DAT::t_ffloat_1d d_cos_shift;
-  DAT::t_ffloat_1d d_weight;
+  Kokkos::DualView<int,DeviceType> k_warning_flag;
+  typename Kokkos::DualView<int,DeviceType>::t_dev d_warning_flag;
+  typename Kokkos::DualView<int,DeviceType>::t_host h_warning_flag;
+
+  typename AT::t_ffloat_2d d_lj14_1;
+  typename AT::t_ffloat_2d d_lj14_2;
+  typename AT::t_ffloat_2d d_lj14_3;
+  typename AT::t_ffloat_2d d_lj14_4;
+
+  typename AT::t_ffloat_1d d_k;
+  typename AT::t_ffloat_1d d_multiplicity;
+  typename AT::t_ffloat_1d d_shift;
+  typename AT::t_ffloat_1d d_sin_shift;
+  typename AT::t_ffloat_1d d_cos_shift;
+  typename AT::t_ffloat_1d d_weight;
 
   virtual void allocate();
 };
diff --git a/src/KOKKOS/dihedral_opls_kokkos.cpp b/src/KOKKOS/dihedral_opls_kokkos.cpp
index 608e8d97545b370b2a014867ac4eb6538e437c81..ff28391ec3c3141cb7d742b149b468733776dc1b 100755
--- a/src/KOKKOS/dihedral_opls_kokkos.cpp
+++ b/src/KOKKOS/dihedral_opls_kokkos.cpp
@@ -515,7 +515,10 @@ void DihedralOPLSKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int i1, const
 
 /* ---------------------------------------------------------------------- */
 
+namespace LAMMPS_NS {
 template class DihedralOPLSKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class DihedralOPLSKokkos<LMPHostType>;
-#endif
\ No newline at end of file
+#endif
+}
+
diff --git a/src/KOKKOS/domain_kokkos.cpp b/src/KOKKOS/domain_kokkos.cpp
index 1c88836afda5021860ef407c0c0ff720f67b32f6..cf65316ec9b34517b74086a550e8fb2e8342aebc 100644
--- a/src/KOKKOS/domain_kokkos.cpp
+++ b/src/KOKKOS/domain_kokkos.cpp
@@ -600,3 +600,4 @@ void DomainKokkos::operator()(TagDomain_x2lamda, const int &i) const {
   x(i,1) = h_inv[1]*delta[1] + h_inv[3]*delta[2];
   x(i,2) = h_inv[2]*delta[2];
 }
+
diff --git a/src/KOKKOS/fix_deform_kokkos.cpp b/src/KOKKOS/fix_deform_kokkos.cpp
index 0050fdc5e43f0a3aab38834643e616c7d912213f..b3316022f1076afe074fa4858729c70484b56bc1 100755
--- a/src/KOKKOS/fix_deform_kokkos.cpp
+++ b/src/KOKKOS/fix_deform_kokkos.cpp
@@ -372,3 +372,4 @@ void FixDeformKokkos::end_of_step()
   if (kspace_flag) force->kspace->setup();
 }
 
+
diff --git a/src/KOKKOS/fix_langevin_kokkos.cpp b/src/KOKKOS/fix_langevin_kokkos.cpp
index e7dc6f9ba217aa810b63b778ca3a94a58b5f3a6f..de8920b55690d5a437a92982dd270819e0548218 100644
--- a/src/KOKKOS/fix_langevin_kokkos.cpp
+++ b/src/KOKKOS/fix_langevin_kokkos.cpp
@@ -804,7 +804,10 @@ void FixLangevinKokkos<DeviceType>::cleanup_copy()
   vatom = NULL;
 }
 
+namespace LAMMPS_NS {
 template class FixLangevinKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class FixLangevinKokkos<LMPHostType>;
 #endif
+}
+
diff --git a/src/KOKKOS/fix_nh_kokkos.cpp b/src/KOKKOS/fix_nh_kokkos.cpp
index d8225b90b5afb4d2282b7514a9ec4e9011da7388..28b7ff55b6db5a9b146040307d33ed24339a5770 100755
--- a/src/KOKKOS/fix_nh_kokkos.cpp
+++ b/src/KOKKOS/fix_nh_kokkos.cpp
@@ -732,7 +732,10 @@ void FixNHKokkos<DeviceType>::pre_exchange()
   }
 }
 
+namespace LAMMPS_NS {
 template class FixNHKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class FixNHKokkos<LMPHostType>;
-#endif
\ No newline at end of file
+#endif
+}
+
diff --git a/src/KOKKOS/fix_nph_kokkos.cpp b/src/KOKKOS/fix_nph_kokkos.cpp
index 08a7ea166d0f8a7cd255893082aabcd54f852e2f..350832a3d70d908f2346336dc230e9a91a3eeee6 100755
--- a/src/KOKKOS/fix_nph_kokkos.cpp
+++ b/src/KOKKOS/fix_nph_kokkos.cpp
@@ -68,7 +68,10 @@ FixNPHKokkos<DeviceType>::FixNPHKokkos(LAMMPS *lmp, int narg, char **arg) :
   this->pflag = 1;
 }
 
+namespace LAMMPS_NS {
 template class FixNPHKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class FixNPHKokkos<LMPHostType>;
-#endif
\ No newline at end of file
+#endif
+}
+
diff --git a/src/KOKKOS/fix_npt_kokkos.cpp b/src/KOKKOS/fix_npt_kokkos.cpp
index 6ae0add69065d70ce9f9997cd8232cc2a6a3d27c..3832fb3e5771dd796e24bd53fe2e620c14f9232a 100755
--- a/src/KOKKOS/fix_npt_kokkos.cpp
+++ b/src/KOKKOS/fix_npt_kokkos.cpp
@@ -68,7 +68,10 @@ FixNPTKokkos<DeviceType>::FixNPTKokkos(LAMMPS *lmp, int narg, char **arg) :
   this->pflag = 1;
 }
 
+namespace LAMMPS_NS {
 template class FixNPTKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class FixNPTKokkos<LMPHostType>;
 #endif
+}
+
diff --git a/src/KOKKOS/fix_nve_kokkos.cpp b/src/KOKKOS/fix_nve_kokkos.cpp
index aa7698badcafff22b2c8f2e5473c09d5ceef256d..5e7e9b5298943331ee1863ab8eee723778bdb55a 100644
--- a/src/KOKKOS/fix_nve_kokkos.cpp
+++ b/src/KOKKOS/fix_nve_kokkos.cpp
@@ -171,7 +171,10 @@ void FixNVEKokkos<DeviceType>::cleanup_copy()
   vatom = NULL;
 }
 
+namespace LAMMPS_NS {
 template class FixNVEKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class FixNVEKokkos<LMPHostType>;
 #endif
+}
+
diff --git a/src/KOKKOS/fix_nvt_kokkos.cpp b/src/KOKKOS/fix_nvt_kokkos.cpp
index 43fa07ea5a64b5b23f462ae081c1ddc644f5a703..bf6ce0de2b4f6ba3eb1d1cc7461716287d7d46ea 100755
--- a/src/KOKKOS/fix_nvt_kokkos.cpp
+++ b/src/KOKKOS/fix_nvt_kokkos.cpp
@@ -49,7 +49,10 @@ FixNVTKokkos<DeviceType>::FixNVTKokkos(LAMMPS *lmp, int narg, char **arg) :
   this->tflag = 1;
 }
 
+namespace LAMMPS_NS {
 template class FixNVTKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class FixNVTKokkos<LMPHostType>;
-#endif
\ No newline at end of file
+#endif
+}
+
diff --git a/src/KOKKOS/fix_setforce_kokkos.cpp b/src/KOKKOS/fix_setforce_kokkos.cpp
index 5162b81b6716785f1a6b055aed739816822a2784..862aab1e21761233b1f2ba004af2f5037cc9d446 100755
--- a/src/KOKKOS/fix_setforce_kokkos.cpp
+++ b/src/KOKKOS/fix_setforce_kokkos.cpp
@@ -179,7 +179,10 @@ void FixSetForceKokkos<DeviceType>::operator()(TagFixSetForceNonConstant, const
   }
 }
 
+namespace LAMMPS_NS {
 template class FixSetForceKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class FixSetForceKokkos<LMPHostType>;
-#endif
\ No newline at end of file
+#endif
+}
+
diff --git a/src/KOKKOS/fix_wall_reflect_kokkos.cpp b/src/KOKKOS/fix_wall_reflect_kokkos.cpp
index cd6501d50ed3e10c0f4e6db08569c2c92f560fb8..27c0080234261ad1034b99ab5da6f7c764f82d72 100755
--- a/src/KOKKOS/fix_wall_reflect_kokkos.cpp
+++ b/src/KOKKOS/fix_wall_reflect_kokkos.cpp
@@ -105,7 +105,10 @@ void FixWallReflectKokkos<DeviceType>::operator()(TagFixWallReflectPostIntegrate
 
 /* ---------------------------------------------------------------------- */
 
+namespace LAMMPS_NS {
 template class FixWallReflectKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class FixWallReflectKokkos<LMPHostType>;
 #endif
+}
+
diff --git a/src/KOKKOS/improper_harmonic_kokkos.cpp b/src/KOKKOS/improper_harmonic_kokkos.cpp
index f614b3dbc2f9925034e880c5d9cc8df25623f103..34d3d437d6178513e97d45aef0b1f106a61ebe48 100755
--- a/src/KOKKOS/improper_harmonic_kokkos.cpp
+++ b/src/KOKKOS/improper_harmonic_kokkos.cpp
@@ -47,8 +47,8 @@ ImproperHarmonicKokkos<DeviceType>::ImproperHarmonicKokkos(LAMMPS *lmp) : Improp
   datamask_read = X_MASK | F_MASK | ENERGY_MASK | VIRIAL_MASK;
   datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
 
-  k_warning_flag = DAT::tdual_int_scalar("Dihedral:warning_flag");
-  d_warning_flag = k_warning_flag.view<DeviceType>();
+  k_warning_flag = Kokkos::DualView<int,DeviceType>("Dihedral:warning_flag");
+  d_warning_flag = k_warning_flag.template view<DeviceType>();
   h_warning_flag = k_warning_flag.h_view;
 }
 
@@ -77,17 +77,21 @@ void ImproperHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
   // reallocate per-atom arrays if necessary
 
   if (eflag_atom) {
-    memory->destroy_kokkos(k_eatom,eatom);
-    memory->create_kokkos(k_eatom,eatom,maxeatom,"improper:eatom");
-    d_eatom = k_eatom.d_view;
+    if(k_eatom.dimension_0()<maxeatom) {
+      memory->destroy_kokkos(k_eatom,eatom);
+      memory->create_kokkos(k_eatom,eatom,maxeatom,"improper:eatom");
+      d_eatom = k_eatom.d_view;
+    }
   }
   if (vflag_atom) {
-    memory->destroy_kokkos(k_vatom,vatom);
-    memory->create_kokkos(k_vatom,vatom,maxvatom,6,"improper:vatom");
-    d_vatom = k_vatom.d_view;
+    if(k_vatom.dimension_0()<maxvatom) {
+      memory->destroy_kokkos(k_vatom,vatom);
+      memory->create_kokkos(k_vatom,vatom,maxvatom,6,"improper:vatom");
+      d_vatom = k_vatom.d_view;
+    }
   }
 
-  atomKK->sync(execution_space,datamask_read);
+  //atomKK->sync(execution_space,datamask_read);
   k_k.template sync<DeviceType>();
   k_chi.template sync<DeviceType>();
   if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
@@ -124,7 +128,7 @@ void ImproperHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagImproperHarmonicCompute<0,0> >(0,nimproperlist),*this);
     }
   }
-  DeviceType::fence();
+  //DeviceType::fence();
 
   // error check
 
@@ -161,9 +165,6 @@ template<int NEWTON_BOND, int EVFLAG>
 KOKKOS_INLINE_FUNCTION
 void ImproperHarmonicKokkos<DeviceType>::operator()(TagImproperHarmonicCompute<NEWTON_BOND,EVFLAG>, const int &n, EV_FLOAT& ev) const {
 
-  // The f array is atomic
-  Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > a_f = f;
-
   const int i1 = improperlist(n,0);
   const int i2 = improperlist(n,1);
   const int i3 = improperlist(n,2);
@@ -262,27 +263,27 @@ void ImproperHarmonicKokkos<DeviceType>::operator()(TagImproperHarmonicCompute<N
   // apply force to each of 4 atoms
 
   if (NEWTON_BOND || i1 < nlocal) {
-    a_f(i1,0) += f1[0];
-    a_f(i1,1) += f1[1];
-    a_f(i1,2) += f1[2];
+    f(i1,0) += f1[0];
+    f(i1,1) += f1[1];
+    f(i1,2) += f1[2];
   }
 
   if (NEWTON_BOND || i2 < nlocal) {
-    a_f(i2,0) += f2[0];
-    a_f(i2,1) += f2[1];
-    a_f(i2,2) += f2[2];
+    f(i2,0) += f2[0];
+    f(i2,1) += f2[1];
+    f(i2,2) += f2[2];
   }
 
   if (NEWTON_BOND || i3 < nlocal) {
-    a_f(i3,0) += f3[0];
-    a_f(i3,1) += f3[1];
-    a_f(i3,2) += f3[2];
+    f(i3,0) += f3[0];
+    f(i3,1) += f3[1];
+    f(i3,2) += f3[2];
   }
 
   if (NEWTON_BOND || i4 < nlocal) {
-    a_f(i4,0) += f4[0];
-    a_f(i4,1) += f4[1];
-    a_f(i4,2) += f4[2];
+    f(i4,0) += f4[0];
+    f(i4,1) += f4[1];
+    f(i4,2) += f4[2];
   }
 
   if (EVFLAG)
@@ -306,8 +307,8 @@ void ImproperHarmonicKokkos<DeviceType>::allocate()
   ImproperHarmonic::allocate();
 
   int n = atom->nimpropertypes;
-  k_k = DAT::tdual_ffloat_1d("ImproperHarmonic::k",n+1);
-  k_chi = DAT::tdual_ffloat_1d("ImproperHarmonic::chi",n+1);
+  k_k = Kokkos::DualView<F_FLOAT*,DeviceType>("ImproperHarmonic::k",n+1);
+  k_chi = Kokkos::DualView<F_FLOAT*,DeviceType>("ImproperHarmonic::chi",n+1);
 
   d_k = k_k.d_view;
   d_chi = k_chi.d_view;
@@ -351,9 +352,6 @@ void ImproperHarmonicKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int i1, co
   E_FLOAT eimproperquarter;
   F_FLOAT v[6];
 
-  // The eatom and vatom arrays are atomic
-  Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_eatom = k_eatom.view<DeviceType>();
-  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_vatom = k_vatom.view<DeviceType>();
 
   if (eflag_either) {
     if (eflag_global) {
@@ -368,10 +366,10 @@ void ImproperHarmonicKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int i1, co
     }
     if (eflag_atom) {
       eimproperquarter = 0.25*eimproper;
-      if (newton_bond || i1 < nlocal) v_eatom[i1] += eimproperquarter;
-      if (newton_bond || i2 < nlocal) v_eatom[i2] += eimproperquarter;
-      if (newton_bond || i3 < nlocal) v_eatom[i3] += eimproperquarter;
-      if (newton_bond || i4 < nlocal) v_eatom[i4] += eimproperquarter;
+      if (newton_bond || i1 < nlocal) d_eatom[i1] += eimproperquarter;
+      if (newton_bond || i2 < nlocal) d_eatom[i2] += eimproperquarter;
+      if (newton_bond || i3 < nlocal) d_eatom[i3] += eimproperquarter;
+      if (newton_bond || i4 < nlocal) d_eatom[i4] += eimproperquarter;
     }
   }
 
@@ -429,36 +427,36 @@ void ImproperHarmonicKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int i1, co
 
     if (vflag_atom) {
       if (newton_bond || i1 < nlocal) {
-        v_vatom(i1,0) += 0.25*v[0];
-        v_vatom(i1,1) += 0.25*v[1];
-        v_vatom(i1,2) += 0.25*v[2];
-        v_vatom(i1,3) += 0.25*v[3];
-        v_vatom(i1,4) += 0.25*v[4];
-        v_vatom(i1,5) += 0.25*v[5];
+        d_vatom(i1,0) += 0.25*v[0];
+        d_vatom(i1,1) += 0.25*v[1];
+        d_vatom(i1,2) += 0.25*v[2];
+        d_vatom(i1,3) += 0.25*v[3];
+        d_vatom(i1,4) += 0.25*v[4];
+        d_vatom(i1,5) += 0.25*v[5];
       }
       if (newton_bond || i2 < nlocal) {
-        v_vatom(i2,0) += 0.25*v[0];
-        v_vatom(i2,1) += 0.25*v[1];
-        v_vatom(i2,2) += 0.25*v[2];
-        v_vatom(i2,3) += 0.25*v[3];
-        v_vatom(i2,4) += 0.25*v[4];
-        v_vatom(i2,5) += 0.25*v[5];
+        d_vatom(i2,0) += 0.25*v[0];
+        d_vatom(i2,1) += 0.25*v[1];
+        d_vatom(i2,2) += 0.25*v[2];
+        d_vatom(i2,3) += 0.25*v[3];
+        d_vatom(i2,4) += 0.25*v[4];
+        d_vatom(i2,5) += 0.25*v[5];
       }
       if (newton_bond || i3 < nlocal) {
-        v_vatom(i3,0) += 0.25*v[0];
-        v_vatom(i3,1) += 0.25*v[1];
-        v_vatom(i3,2) += 0.25*v[2];
-        v_vatom(i3,3) += 0.25*v[3];
-        v_vatom(i3,4) += 0.25*v[4];
-        v_vatom(i3,5) += 0.25*v[5];
+        d_vatom(i3,0) += 0.25*v[0];
+        d_vatom(i3,1) += 0.25*v[1];
+        d_vatom(i3,2) += 0.25*v[2];
+        d_vatom(i3,3) += 0.25*v[3];
+        d_vatom(i3,4) += 0.25*v[4];
+        d_vatom(i3,5) += 0.25*v[5];
       }
       if (newton_bond || i4 < nlocal) {
-        v_vatom(i4,0) += 0.25*v[0];
-        v_vatom(i4,1) += 0.25*v[1];
-        v_vatom(i4,2) += 0.25*v[2];
-        v_vatom(i4,3) += 0.25*v[3];
-        v_vatom(i4,4) += 0.25*v[4];
-        v_vatom(i4,5) += 0.25*v[5];
+        d_vatom(i4,0) += 0.25*v[0];
+        d_vatom(i4,1) += 0.25*v[1];
+        d_vatom(i4,2) += 0.25*v[2];
+        d_vatom(i4,3) += 0.25*v[3];
+        d_vatom(i4,4) += 0.25*v[4];
+        d_vatom(i4,5) += 0.25*v[5];
       }
     }
   }
@@ -466,7 +464,10 @@ void ImproperHarmonicKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int i1, co
 
 /* ---------------------------------------------------------------------- */
 
+namespace LAMMPS_NS {
 template class ImproperHarmonicKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class ImproperHarmonicKokkos<LMPHostType>;
-#endif
\ No newline at end of file
+#endif
+}
+
diff --git a/src/KOKKOS/improper_harmonic_kokkos.h b/src/KOKKOS/improper_harmonic_kokkos.h
index dc7a12af4d25ccfcb00345b78cc465c41f12d497..c83df68d5e54b91322b3a0f363a39cbaac809f2c 100755
--- a/src/KOKKOS/improper_harmonic_kokkos.h
+++ b/src/KOKKOS/improper_harmonic_kokkos.h
@@ -63,26 +63,26 @@ class ImproperHarmonicKokkos : public ImproperHarmonic {
   class NeighborKokkos *neighborKK;
 
   typename AT::t_x_array_randomread x;
-  typename AT::t_f_array f;
+  typename Kokkos::View<double*[3],typename AT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > f;
   typename AT::t_int_2d improperlist;
 
-  DAT::tdual_efloat_1d k_eatom;
-  DAT::tdual_virial_array k_vatom;
-  DAT::t_efloat_1d d_eatom;
-  DAT::t_virial_array d_vatom;
+  Kokkos::DualView<E_FLOAT*,Kokkos::LayoutRight,DeviceType> k_eatom;
+  Kokkos::DualView<F_FLOAT*[6],Kokkos::LayoutRight,DeviceType> k_vatom;
+  Kokkos::View<E_FLOAT*,Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > d_eatom;
+  Kokkos::View<F_FLOAT*[6],Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > d_vatom;
 
   int nlocal,newton_bond;
   int eflag,vflag;
 
-  DAT::tdual_int_scalar k_warning_flag;
-  typename AT::t_int_scalar d_warning_flag;
-  HAT::t_int_scalar h_warning_flag;
+  Kokkos::DualView<int,DeviceType> k_warning_flag;
+  typename Kokkos::DualView<int,DeviceType>::t_dev d_warning_flag;
+  typename Kokkos::DualView<int,DeviceType>::t_host h_warning_flag;
 
-  DAT::tdual_ffloat_1d k_k;
-  DAT::tdual_ffloat_1d k_chi;
+  Kokkos::DualView<F_FLOAT*,DeviceType> k_k;
+  Kokkos::DualView<F_FLOAT*,DeviceType> k_chi;
 
-  DAT::t_ffloat_1d d_k;
-  DAT::t_ffloat_1d d_chi;
+  typename Kokkos::DualView<F_FLOAT*,DeviceType>::t_dev d_k;
+  typename Kokkos::DualView<F_FLOAT*,DeviceType>::t_dev d_chi;
 
   virtual void allocate();
 };
diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp
index c198d97e1429f1a9c5bffee5789e946edaaaa1f9..770598ecd3f0cf885d43965507b2c8ab470fc436 100644
--- a/src/KOKKOS/kokkos.cpp
+++ b/src/KOKKOS/kokkos.cpp
@@ -274,3 +274,4 @@ int KokkosLMP::neigh_count(int m)
 
   return nneigh;
 }
+
diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h
index c219b6628b0b4aa56a8d1d00b402b73c019318b2..6d8e8226348d13988216b49664ec39e8d6c628e3 100644
--- a/src/KOKKOS/kokkos_type.h
+++ b/src/KOKKOS/kokkos_type.h
@@ -19,6 +19,11 @@
 #include <impl/Kokkos_Timer.hpp>
 #include <Kokkos_Vectorization.hpp>
 
+#if defined(KOKKOS_HAVE_CXX11)
+#undef ISFINITE
+#define ISFINITE(x) std::isfinite(x)
+#endif
+
 #define MAX_TYPES_STACKPARAMS 12
 #define NeighClusterSize 8
 
@@ -740,7 +745,11 @@ template<class ViewType>
 void memset_kokkos (ViewType &view) {
   static MemsetZeroFunctor<typename ViewType::execution_space> f;
   f.ptr = view.ptr_on_device();
+  #ifdef KOKKOS_USING_EXPERIMENTAL_VIEW
+  Kokkos::parallel_for(view.memory_span()/4, f);
+  #else
   Kokkos::parallel_for(view.capacity()*sizeof(typename ViewType::value_type)/4, f);
+  #endif
   ViewType::execution_space::fence();
 }
 
diff --git a/src/KOKKOS/modify_kokkos.cpp b/src/KOKKOS/modify_kokkos.cpp
index 51903a58f64e89991171908d7e30e566ff462bac..b1b98144a604b6f832a1222667ffcad0790711b4 100644
--- a/src/KOKKOS/modify_kokkos.cpp
+++ b/src/KOKKOS/modify_kokkos.cpp
@@ -583,3 +583,4 @@ int ModifyKokkos::min_reset_ref()
   }
   return itmpall;
 }
+
diff --git a/src/KOKKOS/neigh_bond_kokkos.cpp b/src/KOKKOS/neigh_bond_kokkos.cpp
index 3c243b9153cb1879e54d988d0c2e0d5adc7a80d9..767def70c2fd4569a95a690a3d4dc13397e59dc0 100755
--- a/src/KOKKOS/neigh_bond_kokkos.cpp
+++ b/src/KOKKOS/neigh_bond_kokkos.cpp
@@ -1290,7 +1290,10 @@ void NeighBondKokkos<DeviceType>::update_domain_variables()
 
 /* ---------------------------------------------------------------------- */
 
+namespace LAMMPS_NS {
 template class NeighBondKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class NeighBondKokkos<LMPHostType>;
 #endif
+}
+
diff --git a/src/KOKKOS/neigh_list_kokkos.cpp b/src/KOKKOS/neigh_list_kokkos.cpp
index 302b6def62398f9ff424dda19ec23c3adc2d5c8b..5fe796f84de9ad0c82d3edbe6e660c8e604c8df7 100644
--- a/src/KOKKOS/neigh_list_kokkos.cpp
+++ b/src/KOKKOS/neigh_list_kokkos.cpp
@@ -112,7 +112,10 @@ void NeighListKokkos<Device>::stencil_allocate(int smax, int style)
   }
 }
 
+namespace LAMMPS_NS {
 template class NeighListKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class NeighListKokkos<LMPHostType>;
 #endif
+}
+
diff --git a/src/KOKKOS/neighbor_kokkos.cpp b/src/KOKKOS/neighbor_kokkos.cpp
index 17cd93620105f17989c0d44d1894239db7c86d92..b3ed769d75e177d8c7c52bc1d72c5fd468b0963c 100644
--- a/src/KOKKOS/neighbor_kokkos.cpp
+++ b/src/KOKKOS/neighbor_kokkos.cpp
@@ -598,3 +598,4 @@ void NeighborKokkos::build_topology_kokkos() {
 // include to trigger instantiation of templated functions
 
 #include "neigh_full_kokkos.h"
+
diff --git a/src/KOKKOS/pair_buck_coul_cut_kokkos.cpp b/src/KOKKOS/pair_buck_coul_cut_kokkos.cpp
index 51547cea9768053874235e3f9820d6acfbe3d516..4c431bb42761e46b49d3e62dc6a5b8d98046f918 100644
--- a/src/KOKKOS/pair_buck_coul_cut_kokkos.cpp
+++ b/src/KOKKOS/pair_buck_coul_cut_kokkos.cpp
@@ -141,8 +141,6 @@ void PairBuckCoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
   EV_FLOAT ev = pair_compute<PairBuckCoulCutKokkos<DeviceType>,void >
     (this,(NeighListKokkos<DeviceType>*)list);
 
-  DeviceType::fence();
-
   if (eflag) {
     eng_vdwl += ev.evdwl;
     eng_coul += ev.ecoul;
@@ -367,7 +365,10 @@ double PairBuckCoulCutKokkos<DeviceType>::init_one(int i, int j)
   return cutone;
 }
 
+namespace LAMMPS_NS {
 template class PairBuckCoulCutKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class PairBuckCoulCutKokkos<LMPHostType>;
 #endif
+}
+
diff --git a/src/KOKKOS/pair_buck_coul_long_kokkos.cpp b/src/KOKKOS/pair_buck_coul_long_kokkos.cpp
index 57108ee6e097a0e318d030fd19ecf5cb11cb787e..a7e6deb43f3860e0f7399bd9f6a66c09edb05c16 100644
--- a/src/KOKKOS/pair_buck_coul_long_kokkos.cpp
+++ b/src/KOKKOS/pair_buck_coul_long_kokkos.cpp
@@ -153,8 +153,6 @@ void PairBuckCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       (this,(NeighListKokkos<DeviceType>*)list);
 
 
-  DeviceType::fence();
-
   if (eflag) {
     eng_vdwl += ev.evdwl;
     eng_coul += ev.ecoul;
@@ -511,7 +509,10 @@ double PairBuckCoulLongKokkos<DeviceType>::init_one(int i, int j)
 }
 
 
+namespace LAMMPS_NS {
 template class PairBuckCoulLongKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class PairBuckCoulLongKokkos<LMPHostType>;
 #endif
+}
+
diff --git a/src/KOKKOS/pair_buck_kokkos.cpp b/src/KOKKOS/pair_buck_kokkos.cpp
index 7d73160392a18383b1775edcc38a9fd8eb051b40..50d65b4b6deea9085bfb51d1af760aafb6fd9e20 100755
--- a/src/KOKKOS/pair_buck_kokkos.cpp
+++ b/src/KOKKOS/pair_buck_kokkos.cpp
@@ -120,7 +120,6 @@ void PairBuckKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
 
   copymode = 1;
   EV_FLOAT ev = pair_compute<PairBuckKokkos<DeviceType>,void >(this,(NeighListKokkos<DeviceType>*)list);
-  DeviceType::fence();
 
   if (eflag_global) eng_vdwl += ev.evdwl;
   if (vflag_global) {
@@ -280,7 +279,10 @@ double PairBuckKokkos<DeviceType>::init_one(int i, int j)
   return cutone;
 }
 
+namespace LAMMPS_NS {
 template class PairBuckKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class PairBuckKokkos<LMPHostType>;
-#endif
\ No newline at end of file
+#endif
+}
+
diff --git a/src/KOKKOS/pair_coul_cut_kokkos.cpp b/src/KOKKOS/pair_coul_cut_kokkos.cpp
index cfed188c5aa3fc7335feff2bf851e01bb3d08bb9..7b0fbad7e5e671c9789c8d479436c9457335c1b6 100644
--- a/src/KOKKOS/pair_coul_cut_kokkos.cpp
+++ b/src/KOKKOS/pair_coul_cut_kokkos.cpp
@@ -114,8 +114,6 @@ void PairCoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
   EV_FLOAT ev = pair_compute<PairCoulCutKokkos<DeviceType>,void >
     (this,(NeighListKokkos<DeviceType>*)list);
 
-  DeviceType::fence();
-
   if (eflag) eng_coul += ev.ecoul;
   if (vflag_global) {
     virial[0] += ev.v[0];
@@ -259,7 +257,10 @@ double PairCoulCutKokkos<DeviceType>::init_one(int i, int j)
 
 
 
+namespace LAMMPS_NS {
 template class PairCoulCutKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class PairCoulCutKokkos<LMPHostType>;
 #endif
+}
+
diff --git a/src/KOKKOS/pair_coul_debye_kokkos.cpp b/src/KOKKOS/pair_coul_debye_kokkos.cpp
index ea28e325a5f117f9dbd52d072282783f984de6cb..c4b78b89107f839e3fa38f8feaf255e62c7fb03a 100644
--- a/src/KOKKOS/pair_coul_debye_kokkos.cpp
+++ b/src/KOKKOS/pair_coul_debye_kokkos.cpp
@@ -123,8 +123,6 @@ void PairCoulDebyeKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
   EV_FLOAT ev = pair_compute<PairCoulDebyeKokkos<DeviceType>,void >
     (this,(NeighListKokkos<DeviceType>*)list);
 
-  DeviceType::fence();
-
   if (eflag) {
     eng_vdwl += ev.evdwl;
     eng_coul += ev.ecoul;
@@ -307,7 +305,10 @@ double PairCoulDebyeKokkos<DeviceType>::init_one(int i, int j)
   return cutone;
 }
 
+namespace LAMMPS_NS {
 template class PairCoulDebyeKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class PairCoulDebyeKokkos<LMPHostType>;
 #endif
+}
+
diff --git a/src/KOKKOS/pair_coul_dsf_kokkos.cpp b/src/KOKKOS/pair_coul_dsf_kokkos.cpp
index f3f1dcad90e54c48dec5976c990fae9a57e734f8..503cdc280d948ff71945bc6f315458d9ec6d3df7 100755
--- a/src/KOKKOS/pair_coul_dsf_kokkos.cpp
+++ b/src/KOKKOS/pair_coul_dsf_kokkos.cpp
@@ -172,7 +172,6 @@ void PairCoulDSFKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       }
     }
   }
-  DeviceType::fence();
 
   if (eflag_global) eng_coul += ev.ecoul;
   if (vflag_global) {
@@ -426,7 +425,10 @@ int PairCoulDSFKokkos<DeviceType>::sbmask(const int& j) const {
   return j >> SBBITS & 3;
 }
 
+namespace LAMMPS_NS {
 template class PairCoulDSFKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class PairCoulDSFKokkos<LMPHostType>;
 #endif
+}
+
diff --git a/src/KOKKOS/pair_coul_long_kokkos.cpp b/src/KOKKOS/pair_coul_long_kokkos.cpp
index 150ed99296ad2ce537b5c75e90aa90fdbf41f226..95b6734e940996acb0ea3f6875b24d790efdd1bc 100644
--- a/src/KOKKOS/pair_coul_long_kokkos.cpp
+++ b/src/KOKKOS/pair_coul_long_kokkos.cpp
@@ -145,8 +145,6 @@ void PairCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       (this,(NeighListKokkos<DeviceType>*)list);
 
 
-  DeviceType::fence();
-
   if (eflag) {
     eng_vdwl += ev.evdwl;
     eng_coul += ev.ecoul;
@@ -450,7 +448,10 @@ double PairCoulLongKokkos<DeviceType>::init_one(int i, int j)
 }
 
 
+namespace LAMMPS_NS {
 template class PairCoulLongKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class PairCoulLongKokkos<LMPHostType>;
 #endif
+}
+
diff --git a/src/KOKKOS/pair_coul_wolf_kokkos.cpp b/src/KOKKOS/pair_coul_wolf_kokkos.cpp
index d11611468f117cfe45d555112945325e47fa53ea..774580c9296645de2bb9592b8d986649db8829b8 100755
--- a/src/KOKKOS/pair_coul_wolf_kokkos.cpp
+++ b/src/KOKKOS/pair_coul_wolf_kokkos.cpp
@@ -173,7 +173,6 @@ void PairCoulWolfKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       }
     }
   }
-  DeviceType::fence();
 
   if (eflag_global) eng_coul += ev.ecoul;
   if (vflag_global) {
@@ -428,7 +427,10 @@ int PairCoulWolfKokkos<DeviceType>::sbmask(const int& j) const {
   return j >> SBBITS & 3;
 }
 
+namespace LAMMPS_NS {
 template class PairCoulWolfKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class PairCoulWolfKokkos<LMPHostType>;
 #endif
+}
+
diff --git a/src/KOKKOS/pair_eam_alloy_kokkos.cpp b/src/KOKKOS/pair_eam_alloy_kokkos.cpp
index 706cf0b52375f7a839fbeff0944e9ef06fa99f1f..88b16d1d19d47d14ff0d302f9fc759a4819914af 100755
--- a/src/KOKKOS/pair_eam_alloy_kokkos.cpp
+++ b/src/KOKKOS/pair_eam_alloy_kokkos.cpp
@@ -133,7 +133,6 @@ void PairEAMAlloyKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyInitialize>(0,nall),*this);
   else
     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyInitialize>(0,nlocal),*this);
-  DeviceType::fence();
 
   // loop over neighbors of my atoms
 
@@ -156,7 +155,6 @@ void PairEAMAlloyKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
         Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelA<HALFTHREAD,0> >(0,inum),*this);
       }
     }
-    DeviceType::fence();
 
     // communicate and sum densities (on the host)
 
@@ -174,7 +172,6 @@ void PairEAMAlloyKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelB<1> >(0,inum),*this,ev);
     else
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelB<0> >(0,inum),*this);
-    DeviceType::fence();
 
   } else if (neighflag == FULL) {
 
@@ -184,7 +181,6 @@ void PairEAMAlloyKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelAB<1> >(0,inum),*this,ev);
     else
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelAB<0> >(0,inum),*this);
-    DeviceType::fence();
   }
 
   if (eflag) {
@@ -239,7 +235,6 @@ void PairEAMAlloyKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       }
     }
   }
-  DeviceType::fence();
 
   if (eflag_global) eng_vdwl += ev.evdwl;
   if (vflag_global) {
@@ -1171,7 +1166,10 @@ void PairEAMAlloyKokkos<DeviceType>::file2array_alloy()
 
 /* ---------------------------------------------------------------------- */
 
+namespace LAMMPS_NS {
 template class PairEAMAlloyKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class PairEAMAlloyKokkos<LMPHostType>;
-#endif
\ No newline at end of file
+#endif
+}
+
diff --git a/src/KOKKOS/pair_eam_fs_kokkos.cpp b/src/KOKKOS/pair_eam_fs_kokkos.cpp
index 6d96d3c33c3f65ef21f17f5dfc99f4b3565c8d9b..83b65e8fdc2281174c43b1a6c2da03c5b7e6c420 100755
--- a/src/KOKKOS/pair_eam_fs_kokkos.cpp
+++ b/src/KOKKOS/pair_eam_fs_kokkos.cpp
@@ -1180,7 +1180,10 @@ void PairEAMFSKokkos<DeviceType>::file2array_fs()
 
 /* ---------------------------------------------------------------------- */
 
+namespace LAMMPS_NS {
 template class PairEAMFSKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class PairEAMFSKokkos<LMPHostType>;
-#endif
\ No newline at end of file
+#endif
+}
+
diff --git a/src/KOKKOS/pair_eam_kokkos.cpp b/src/KOKKOS/pair_eam_kokkos.cpp
index b643393f84492bf5db669c5bf6e4fe5fb5729186..37a26145e0773b06a51008a8e8edcd9b1427fde8 100755
--- a/src/KOKKOS/pair_eam_kokkos.cpp
+++ b/src/KOKKOS/pair_eam_kokkos.cpp
@@ -128,7 +128,6 @@ void PairEAMKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMInitialize>(0,nall),*this);
   else
     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMInitialize>(0,nlocal),*this);
-  DeviceType::fence();
 
   // loop over neighbors of my atoms
 
@@ -151,7 +150,6 @@ void PairEAMKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
         Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMKernelA<HALFTHREAD,0> >(0,inum),*this);
       }
     }
-    DeviceType::fence();
 
     // communicate and sum densities (on the host)
 
@@ -169,7 +167,6 @@ void PairEAMKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMKernelB<1> >(0,inum),*this,ev);
     else
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMKernelB<0> >(0,inum),*this);
-    DeviceType::fence();
 
   } else if (neighflag == FULL) {
 
@@ -179,7 +176,6 @@ void PairEAMKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMKernelAB<1> >(0,inum),*this,ev);
     else
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMKernelAB<0> >(0,inum),*this);
-    DeviceType::fence();
   }
 
   if (eflag) {
@@ -234,7 +230,6 @@ void PairEAMKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       }
     }
   }
-  DeviceType::fence();
 
   if (eflag_global) eng_vdwl += ev.evdwl;
   if (vflag_global) {
@@ -342,60 +337,6 @@ void PairEAMKokkos<DeviceType>::file2array()
 template<class DeviceType>
 void PairEAMKokkos<DeviceType>::array2spline()
 {
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
   rdr = 1.0/dr;
   rdrho = 1.0/drho;
 
@@ -638,7 +579,6 @@ template<class DeviceType>
 template<int EFLAG>
 KOKKOS_INLINE_FUNCTION
 void PairEAMKokkos<DeviceType>::operator()(TagPairEAMKernelB<EFLAG>, const int &ii, EV_FLOAT& ev) const {
-
   // fp = derivative of embedding energy at each atom
   // phi = embedding energy at each atom
   // if rho > rhomax (e.g. due to close approach of two atoms),
@@ -949,7 +889,10 @@ void PairEAMKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const int &
   }
 }
 
+namespace LAMMPS_NS {
 template class PairEAMKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class PairEAMKokkos<LMPHostType>;
-#endif
\ No newline at end of file
+#endif
+}
+
diff --git a/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.cpp b/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.cpp
index 4e97e3ebd219023867cdc801e53672ce8bfddcb9..d438e64e7d923cbb866d611f6e44f8054784ee12 100644
--- a/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.cpp
+++ b/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.cpp
@@ -154,8 +154,6 @@ void PairLJCharmmCoulCharmmImplicitKokkos<DeviceType>::compute(int eflag_in, int
       (this,(NeighListKokkos<DeviceType>*)list);
 
 
-  DeviceType::fence();
-
   if (eflag) {
     eng_vdwl += ev.evdwl;
     eng_coul += ev.ecoul;
@@ -510,7 +508,10 @@ double PairLJCharmmCoulCharmmImplicitKokkos<DeviceType>::init_one(int i, int j)
 
 
 
+namespace LAMMPS_NS {
 template class PairLJCharmmCoulCharmmImplicitKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class PairLJCharmmCoulCharmmImplicitKokkos<LMPHostType>;
 #endif
+}
+
diff --git a/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.cpp b/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.cpp
index a31377f40a14f308200ac3eda0455f3bae6dbe77..4e125235f465f7a05a4c368e8299c70a545204ca 100644
--- a/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.cpp
+++ b/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.cpp
@@ -154,8 +154,6 @@ void PairLJCharmmCoulCharmmKokkos<DeviceType>::compute(int eflag_in, int vflag_i
       (this,(NeighListKokkos<DeviceType>*)list);
 
 
-  DeviceType::fence();
-
   if (eflag) {
     eng_vdwl += ev.evdwl;
     eng_coul += ev.ecoul;
@@ -511,7 +509,9 @@ double PairLJCharmmCoulCharmmKokkos<DeviceType>::init_one(int i, int j)
 
 
 
+namespace LAMMPS_NS {
 template class PairLJCharmmCoulCharmmKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class PairLJCharmmCoulCharmmKokkos<LMPHostType>;
 #endif
+}
diff --git a/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.cpp b/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.cpp
index 224e7619522ead92e655b8913e82f710b778bb40..c749b85f3c83b2420269bb4d90d804d5b0672337 100644
--- a/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.cpp
+++ b/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.cpp
@@ -154,8 +154,6 @@ void PairLJCharmmCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       (this,(NeighListKokkos<DeviceType>*)list);
 
 
-  DeviceType::fence();
-
   if (eflag) {
     eng_vdwl += ev.evdwl;
     eng_coul += ev.ecoul;
@@ -518,7 +516,10 @@ double PairLJCharmmCoulLongKokkos<DeviceType>::init_one(int i, int j)
 
 
 
+namespace LAMMPS_NS {
 template class PairLJCharmmCoulLongKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class PairLJCharmmCoulLongKokkos<LMPHostType>;
 #endif
+}
+
diff --git a/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.cpp b/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.cpp
index 9a451a744e6d60b76e14eb0362b81788bca1229e..87cd1cb7e141a50ba52d6036f6626e25eaa0ab43 100644
--- a/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.cpp
+++ b/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.cpp
@@ -125,8 +125,6 @@ void PairLJClass2CoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
   EV_FLOAT ev = pair_compute<PairLJClass2CoulCutKokkos<DeviceType>,void >
     (this,(NeighListKokkos<DeviceType>*)list);
 
-  DeviceType::fence();
-
   if (eflag) {
     eng_vdwl += ev.evdwl;
     eng_coul += ev.ecoul;
@@ -347,8 +345,10 @@ double PairLJClass2CoulCutKokkos<DeviceType>::init_one(int i, int j)
 }
 
 
-
+namespace LAMMPS_NS {
 template class PairLJClass2CoulCutKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class PairLJClass2CoulCutKokkos<LMPHostType>;
 #endif
+}
+
diff --git a/src/KOKKOS/pair_lj_class2_coul_long_kokkos.cpp b/src/KOKKOS/pair_lj_class2_coul_long_kokkos.cpp
index a9cb4fbbe111606cb8cd73f09cff7317e26f4bb6..297a764ddabd4004ac95c3e829ece6c78c602b45 100644
--- a/src/KOKKOS/pair_lj_class2_coul_long_kokkos.cpp
+++ b/src/KOKKOS/pair_lj_class2_coul_long_kokkos.cpp
@@ -139,8 +139,6 @@ void PairLJClass2CoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       (this,(NeighListKokkos<DeviceType>*)list);
 
 
-  DeviceType::fence();
-
   if (eflag) {
     eng_vdwl += ev.evdwl;
     eng_coul += ev.ecoul;
@@ -496,8 +494,10 @@ double PairLJClass2CoulLongKokkos<DeviceType>::init_one(int i, int j)
 }
 
 
-
+namespace LAMMPS_NS {
 template class PairLJClass2CoulLongKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class PairLJClass2CoulLongKokkos<LMPHostType>;
 #endif
+}
+
diff --git a/src/KOKKOS/pair_lj_class2_kokkos.cpp b/src/KOKKOS/pair_lj_class2_kokkos.cpp
index 9bc3989fff9052b07f5b034698de52f8fe8695cf..a263e81e0e8a74f10f6bc36907f75c8d1527fba5 100644
--- a/src/KOKKOS/pair_lj_class2_kokkos.cpp
+++ b/src/KOKKOS/pair_lj_class2_kokkos.cpp
@@ -114,7 +114,6 @@ void PairLJClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in)
   // loop over neighbors of my atoms
 
   EV_FLOAT ev = pair_compute<PairLJClass2Kokkos<DeviceType>,void >(this,(NeighListKokkos<DeviceType>*)list);
-  DeviceType::fence();
 
   if (eflag) eng_vdwl += ev.evdwl;
   if (vflag_global) {
@@ -274,8 +273,10 @@ double PairLJClass2Kokkos<DeviceType>::init_one(int i, int j)
 }
 
 
-
+namespace LAMMPS_NS {
 template class PairLJClass2Kokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class PairLJClass2Kokkos<LMPHostType>;
 #endif
+}
+
diff --git a/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.cpp b/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.cpp
index c3b9439670b2c6a186b0015915c57f1e5e56ab91..b6071880cfe841d7a7204387cf9ca1fafa0ac953 100644
--- a/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.cpp
+++ b/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.cpp
@@ -123,8 +123,6 @@ void PairLJCutCoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
   EV_FLOAT ev = pair_compute<PairLJCutCoulCutKokkos<DeviceType>,void >
     (this,(NeighListKokkos<DeviceType>*)list);
 
-  DeviceType::fence();
-
   if (eflag) {
     eng_vdwl += ev.evdwl;
     eng_coul += ev.ecoul;
@@ -339,7 +337,10 @@ double PairLJCutCoulCutKokkos<DeviceType>::init_one(int i, int j)
 
 
 
+namespace LAMMPS_NS {
 template class PairLJCutCoulCutKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class PairLJCutCoulCutKokkos<LMPHostType>;
 #endif
+}
+
diff --git a/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.cpp b/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.cpp
index 4e3680bf3055aba2f57d4c4ac11c88c622112996..1da18f0afeb41af1b9320f69bc5d05bea4333ebd 100644
--- a/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.cpp
+++ b/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.cpp
@@ -129,8 +129,6 @@ void PairLJCutCoulDebyeKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
   EV_FLOAT ev = pair_compute<PairLJCutCoulDebyeKokkos<DeviceType>,void >
     (this,(NeighListKokkos<DeviceType>*)list);
 
-  DeviceType::fence();
-
   if (eflag) {
     eng_vdwl += ev.evdwl;
     eng_coul += ev.ecoul;
@@ -369,7 +367,10 @@ double PairLJCutCoulDebyeKokkos<DeviceType>::init_one(int i, int j)
 
 
 
+namespace LAMMPS_NS {
 template class PairLJCutCoulDebyeKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class PairLJCutCoulDebyeKokkos<LMPHostType>;
 #endif
+}
+
diff --git a/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.cpp b/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.cpp
index 480f2ad180d22a3dab832434121236fc6b1dfba9..46cb0a96dc1a93f1d411079656dd9e3e32f59bdf 100644
--- a/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.cpp
+++ b/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.cpp
@@ -147,8 +147,6 @@ void PairLJCutCoulDSFKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
   ev = pair_compute<PairLJCutCoulDSFKokkos<DeviceType>,void >
     (this,(NeighListKokkos<DeviceType>*)list);
 
-  DeviceType::fence();
-
   if (eflag) {
     eng_vdwl += ev.evdwl;
     eng_coul += ev.ecoul;
@@ -360,7 +358,10 @@ double PairLJCutCoulDSFKokkos<DeviceType>::init_one(int i, int j)
 
 
 
+namespace LAMMPS_NS {
 template class PairLJCutCoulDSFKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class PairLJCutCoulDSFKokkos<LMPHostType>;
 #endif
+}
+
diff --git a/src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp b/src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp
index 356fc4f2c5342b069a3bb9cf979d09923dbd77ab..00d1561bc3d3fa1b4fbeb7cb599d655ef0a4339b 100644
--- a/src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp
+++ b/src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp
@@ -137,8 +137,6 @@ void PairLJCutCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       (this,(NeighListKokkos<DeviceType>*)list);
 
 
-  DeviceType::fence();
-
   if (eflag) {
     eng_vdwl += ev.evdwl;
     eng_coul += ev.ecoul;
@@ -488,7 +486,10 @@ double PairLJCutCoulLongKokkos<DeviceType>::init_one(int i, int j)
 
 
 
+namespace LAMMPS_NS {
 template class PairLJCutCoulLongKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class PairLJCutCoulLongKokkos<LMPHostType>;
 #endif
+}
+
diff --git a/src/KOKKOS/pair_lj_cut_kokkos.cpp b/src/KOKKOS/pair_lj_cut_kokkos.cpp
index 3fd73350edfd35f83aea27ccd8bb6beec21b6a7f..2ad7f2d0143e0873f441b0743e26a623f258e223 100644
--- a/src/KOKKOS/pair_lj_cut_kokkos.cpp
+++ b/src/KOKKOS/pair_lj_cut_kokkos.cpp
@@ -127,7 +127,6 @@ void PairLJCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
   // loop over neighbors of my atoms
 
   EV_FLOAT ev = pair_compute<PairLJCutKokkos<DeviceType>,void >(this,(NeighListKokkos<DeviceType>*)list);
-  DeviceType::fence();
 
   if (eflag_global) eng_vdwl += ev.evdwl;
   if (vflag_global) {
@@ -293,7 +292,10 @@ double PairLJCutKokkos<DeviceType>::init_one(int i, int j)
 
 
 
+namespace LAMMPS_NS {
 template class PairLJCutKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class PairLJCutKokkos<LMPHostType>;
 #endif
+}
+
diff --git a/src/KOKKOS/pair_lj_expand_kokkos.cpp b/src/KOKKOS/pair_lj_expand_kokkos.cpp
index 29473f14bcbc8e1b4083dba10f587c122baf686e..3e1d185d2fd3c16d3a606a35848b5ad6cd04612c 100644
--- a/src/KOKKOS/pair_lj_expand_kokkos.cpp
+++ b/src/KOKKOS/pair_lj_expand_kokkos.cpp
@@ -115,7 +115,6 @@ void PairLJExpandKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
   copymode = 1;
 
   EV_FLOAT ev = pair_compute<PairLJExpandKokkos<DeviceType>,void >(this,(NeighListKokkos<DeviceType>*)list);
-  DeviceType::fence();
 
   if (eflag) eng_vdwl += ev.evdwl;
   if (vflag_global) {
@@ -279,7 +278,10 @@ double PairLJExpandKokkos<DeviceType>::init_one(int i, int j)
 
 
 
+namespace LAMMPS_NS {
 template class PairLJExpandKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class PairLJExpandKokkos<LMPHostType>;
 #endif
+}
+
diff --git a/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.cpp b/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.cpp
index 7081dd5417e9a371a6c67769cec97fef789d071b..c764af303f5a91c1556deef816af9347dd4edbad 100644
--- a/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.cpp
+++ b/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.cpp
@@ -145,8 +145,6 @@ void PairLJGromacsCoulGromacsKokkos<DeviceType>::compute(int eflag_in, int vflag
       (this,(NeighListKokkos<DeviceType>*)list);
 
 
-  DeviceType::fence();
-
   if (eflag) {
     eng_vdwl += ev.evdwl;
     eng_coul += ev.ecoul;
@@ -495,7 +493,10 @@ double PairLJGromacsCoulGromacsKokkos<DeviceType>::init_one(int i, int j)
 
 
 
+namespace LAMMPS_NS {
 template class PairLJGromacsCoulGromacsKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class PairLJGromacsCoulGromacsKokkos<LMPHostType>;
 #endif
+}
+
diff --git a/src/KOKKOS/pair_lj_gromacs_kokkos.cpp b/src/KOKKOS/pair_lj_gromacs_kokkos.cpp
index d1699fe6b6474b6bb24af26f19e260d0f1473617..2f144599ac34a8df9b3465554bb7f1f25983977e 100644
--- a/src/KOKKOS/pair_lj_gromacs_kokkos.cpp
+++ b/src/KOKKOS/pair_lj_gromacs_kokkos.cpp
@@ -132,8 +132,6 @@ void PairLJGromacsKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
   EV_FLOAT ev = pair_compute<PairLJGromacsKokkos<DeviceType>,CoulLongTable<0> >
       (this,(NeighListKokkos<DeviceType>*)list);
 
-  DeviceType::fence();
-
   if (eflag) {
     eng_vdwl += ev.evdwl;
     eng_coul += 0.0;
@@ -327,7 +325,10 @@ double PairLJGromacsKokkos<DeviceType>::init_one(int i, int j)
   return cutone;
 }
 
+namespace LAMMPS_NS {
 template class PairLJGromacsKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class PairLJGromacsKokkos<LMPHostType>;
 #endif
+}
+
diff --git a/src/KOKKOS/pair_lj_sdk_kokkos.cpp b/src/KOKKOS/pair_lj_sdk_kokkos.cpp
index 6ff92035351c67058fd913463cedd20210d6adf7..74183dff0b8b332e435105e72daca6af5597c07b 100644
--- a/src/KOKKOS/pair_lj_sdk_kokkos.cpp
+++ b/src/KOKKOS/pair_lj_sdk_kokkos.cpp
@@ -113,7 +113,6 @@ void PairLJSDKKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
   // loop over neighbors of my atoms
 
   EV_FLOAT ev = pair_compute<PairLJSDKKokkos<DeviceType>,void >(this,(NeighListKokkos<DeviceType>*)list);
-  DeviceType::fence();
 
   if (eflag) eng_vdwl += ev.evdwl;
   if (vflag_global) {
@@ -212,7 +211,6 @@ void PairLJSDKKokkos<DeviceType>::allocate()
   d_cutsq = k_cutsq.template view<DeviceType>();
   k_params = Kokkos::DualView<params_lj**,Kokkos::LayoutRight,DeviceType>("PairLJSDK::params",n+1,n+1);
   params = k_params.d_view;
-  printf("Allocating: %i\n",n);
 }
 
 /* ----------------------------------------------------------------------
@@ -308,7 +306,10 @@ double PairLJSDKKokkos<DeviceType>::init_one(int i, int j)
 
 
 
+namespace LAMMPS_NS {
 template class PairLJSDKKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class PairLJSDKKokkos<LMPHostType>;
 #endif
+}
+
diff --git a/src/KOKKOS/pair_sw_kokkos.cpp b/src/KOKKOS/pair_sw_kokkos.cpp
index 8fc188f8f41b02e3be005fc1e08cd36f5b65e411..9864066fc245c26fcf803f648fb43151a25d36eb 100755
--- a/src/KOKKOS/pair_sw_kokkos.cpp
+++ b/src/KOKKOS/pair_sw_kokkos.cpp
@@ -128,28 +128,24 @@ void PairSWKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairSWComputeHalf<HALF,1> >(0,inum),*this,ev);
     else
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairSWComputeHalf<HALF,0> >(0,inum),*this);
-    DeviceType::fence();
     ev_all += ev;
   } else if (neighflag == HALFTHREAD) {
     if (evflag)
       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairSWComputeHalf<HALFTHREAD,1> >(0,inum),*this,ev);
     else
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairSWComputeHalf<HALFTHREAD,0> >(0,inum),*this);
-    DeviceType::fence();
     ev_all += ev;
   } else if (neighflag == FULL) {
     if (evflag)
       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairSWComputeFullA<FULL,1> >(0,inum),*this,ev);
     else
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairSWComputeFullA<FULL,0> >(0,inum),*this);
-    DeviceType::fence();
     ev_all += ev;
 
     if (evflag)
       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairSWComputeFullB<FULL,1> >(0,ignum),*this,ev);
     else
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairSWComputeFullB<FULL,0> >(0,ignum),*this);
-    DeviceType::fence();
     ev_all += ev;
   }
 
@@ -902,7 +898,10 @@ void PairSWKokkos<DeviceType>::ev_tally3_atom(EV_FLOAT &ev, const int &i,
   }
 }
 
+namespace LAMMPS_NS {
 template class PairSWKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class PairSWKokkos<LMPHostType>;
 #endif
+}
+
diff --git a/src/KOKKOS/pair_table_kokkos.cpp b/src/KOKKOS/pair_table_kokkos.cpp
index a7e01649cc9d04436d710574583f2eba86147655..fec6512a331d474935529a0542cdaf0ee33a68e2 100644
--- a/src/KOKKOS/pair_table_kokkos.cpp
+++ b/src/KOKKOS/pair_table_kokkos.cpp
@@ -192,7 +192,6 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
       else Kokkos::parallel_for(config,f);
     }
   }
-  DeviceType::fence();
 
   if (eflag) eng_vdwl += ev.evdwl;
   if (vflag_global) {
@@ -1375,8 +1374,11 @@ void PairTableKokkos<DeviceType>::cleanup_copy() {
   h_table=NULL; d_table=NULL;
 }
 
+namespace LAMMPS_NS {
 template class PairTableKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class PairTableKokkos<LMPHostType>;
 #endif
 
+}
+
diff --git a/src/KOKKOS/pair_tersoff_kokkos.cpp b/src/KOKKOS/pair_tersoff_kokkos.cpp
index 61119c1585013ff115bf1372f35d22824824ef5e..a87c8563045119e4f7178a01c795bd1d7e89b4ea 100755
--- a/src/KOKKOS/pair_tersoff_kokkos.cpp
+++ b/src/KOKKOS/pair_tersoff_kokkos.cpp
@@ -211,28 +211,24 @@ void PairTersoffKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeHalf<HALF,1> >(0,inum),*this,ev);
     else
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeHalf<HALF,0> >(0,inum),*this);
-    DeviceType::fence();
     ev_all += ev;
   } else if (neighflag == HALFTHREAD) {
     if (evflag)
       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeHalf<HALFTHREAD,1> >(0,inum),*this,ev);
     else
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeHalf<HALFTHREAD,0> >(0,inum),*this);
-    DeviceType::fence();
     ev_all += ev;
   } else if (neighflag == FULL) {
     if (evflag)
       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeFullA<FULL,1> >(0,inum),*this,ev);
     else
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeFullA<FULL,0> >(0,inum),*this);
-    DeviceType::fence();
     ev_all += ev;
 
     if (evflag)
       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeFullB<FULL,1> >(0,ignum),*this,ev);
     else
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeFullB<FULL,0> >(0,ignum),*this);
-    DeviceType::fence();
     ev_all += ev;
   }
 
@@ -1196,7 +1192,10 @@ int PairTersoffKokkos<DeviceType>::sbmask(const int& j) const {
   return j >> SBBITS & 3;
 }
 
+namespace LAMMPS_NS {
 template class PairTersoffKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class PairTersoffKokkos<LMPHostType>;
 #endif
+}
+
diff --git a/src/KOKKOS/pair_tersoff_mod_kokkos.cpp b/src/KOKKOS/pair_tersoff_mod_kokkos.cpp
index a6846460bdc038a025383f9b958ae74975e7557e..9e18058a8f5f8d0212a72ea112056dc82e095139 100755
--- a/src/KOKKOS/pair_tersoff_mod_kokkos.cpp
+++ b/src/KOKKOS/pair_tersoff_mod_kokkos.cpp
@@ -211,28 +211,24 @@ void PairTersoffMODKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffMODComputeHalf<HALF,1> >(0,inum),*this,ev);
     else
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffMODComputeHalf<HALF,0> >(0,inum),*this);
-    DeviceType::fence();
     ev_all += ev;
   } else if (neighflag == HALFTHREAD) {
     if (evflag)
       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffMODComputeHalf<HALFTHREAD,1> >(0,inum),*this,ev);
     else
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffMODComputeHalf<HALFTHREAD,0> >(0,inum),*this);
-    DeviceType::fence();
     ev_all += ev;
   } else if (neighflag == FULL) {
     if (evflag)
       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffMODComputeFullA<FULL,1> >(0,inum),*this,ev);
     else
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffMODComputeFullA<FULL,0> >(0,inum),*this);
-    DeviceType::fence();
     ev_all += ev;
 
     if (evflag)
       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffMODComputeFullB<FULL,1> >(0,ignum),*this,ev);
     else
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffMODComputeFullB<FULL,0> >(0,ignum),*this);
-    DeviceType::fence();
     ev_all += ev;
   }
 
@@ -1202,7 +1198,10 @@ int PairTersoffMODKokkos<DeviceType>::sbmask(const int& j) const {
   return j >> SBBITS & 3;
 }
 
+namespace LAMMPS_NS {
 template class PairTersoffMODKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class PairTersoffMODKokkos<LMPHostType>;
 #endif
+}
+
diff --git a/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp b/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp
index 1b1023d946d56fee3e2e0631989bc7803d41b5ba..5f6a95130d1b036a842bc1508213b3b361c4a852 100755
--- a/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp
+++ b/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp
@@ -225,28 +225,24 @@ void PairTersoffZBLKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffZBLComputeHalf<HALF,1> >(0,inum),*this,ev);
     else
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffZBLComputeHalf<HALF,0> >(0,inum),*this);
-    DeviceType::fence();
     ev_all += ev;
   } else if (neighflag == HALFTHREAD) {
     if (evflag)
       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffZBLComputeHalf<HALFTHREAD,1> >(0,inum),*this,ev);
     else
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffZBLComputeHalf<HALFTHREAD,0> >(0,inum),*this);
-    DeviceType::fence();
     ev_all += ev;
   } else if (neighflag == FULL) {
     if (evflag)
       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffZBLComputeFullA<FULL,1> >(0,inum),*this,ev);
     else
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffZBLComputeFullA<FULL,0> >(0,inum),*this);
-    DeviceType::fence();
     ev_all += ev;
 
     if (evflag)
       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffZBLComputeFullB<FULL,1> >(0,ignum),*this,ev);
     else
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffZBLComputeFullB<FULL,0> >(0,ignum),*this);
-    DeviceType::fence();
     ev_all += ev;
   }
 
@@ -1296,7 +1292,10 @@ int PairTersoffZBLKokkos<DeviceType>::sbmask(const int& j) const {
   return j >> SBBITS & 3;
 }
 
+namespace LAMMPS_NS {
 template class PairTersoffZBLKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class PairTersoffZBLKokkos<LMPHostType>;
 #endif
+}
+
diff --git a/src/KOKKOS/region_block_kokkos.cpp b/src/KOKKOS/region_block_kokkos.cpp
index 0de027591265ba69417b835b09c6fb9f30a3bd89..68cc7e49c21651b227b8419feb5ced6f1a45433c 100755
--- a/src/KOKKOS/region_block_kokkos.cpp
+++ b/src/KOKKOS/region_block_kokkos.cpp
@@ -164,7 +164,10 @@ void RegBlockKokkos<DeviceType>::rotate(double &x, double &y, double &z, double
   z = point[2] + c[2] + disp[2];
 }
 
+namespace LAMMPS_NS {
 template class RegBlockKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class RegBlockKokkos<LMPHostType>;
 #endif
+}
+
diff --git a/src/KOKKOS/verlet_kokkos.cpp b/src/KOKKOS/verlet_kokkos.cpp
index 527e10add33956e294b0da3d4befe88f8733f553..d063294023e1830ee0f54a5b63d470b027a3e714 100644
--- a/src/KOKKOS/verlet_kokkos.cpp
+++ b/src/KOKKOS/verlet_kokkos.cpp
@@ -39,6 +39,19 @@
 
 using namespace LAMMPS_NS;
 
+template<class ViewA, class ViewB>
+struct ForceAdder {
+  ViewA a;
+  ViewB b;
+  ForceAdder(const ViewA& a_, const ViewB& b_):a(a_),b(b_) {}
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+    a(i,0) += b(i,0);
+    a(i,1) += b(i,1);
+    a(i,2) += b(i,2);
+  }
+};
+
 /* ---------------------------------------------------------------------- */
 
 VerletKokkos::VerletKokkos(LAMMPS *lmp, int narg, char **arg) :
@@ -278,6 +291,8 @@ void VerletKokkos::run(int n)
   if (atomKK->sortfreq > 0) sortflag = 1;
   else sortflag = 0;
 
+  f_merge_copy = DAT::t_f_array("VerletKokkos::f_merge_copy",atomKK->k_f.dimension_0());
+
   static double time = 0.0;
   static int count = 0;
   atomKK->sync(Device,ALL_MASK);
@@ -359,54 +374,141 @@ void VerletKokkos::run(int n)
 
     timer->stamp();
 
-    // added for debug
-    //atomKK->k_x.sync<LMPHostType>();
-    //atomKK->k_f.sync<LMPHostType>();
-    //atomKK->k_f.modify<LMPHostType>();
     if (n_pre_force) {
       modify->pre_force(vflag);
       timer->stamp(Timer::MODIFY);
     }
 
+    bool execute_on_host = false;
+    unsigned int datamask_read_device = 0;
+    unsigned int datamask_modify_device = 0;
+    unsigned int datamask_read_host = 0;
+    unsigned int datamask_modify_host = 0;
+
+    if ( pair_compute_flag ) {
+      if (force->pair->execution_space==Host) {
+        execute_on_host  = true;
+        datamask_read_host   |= force->pair->datamask_read;
+        datamask_modify_device |= force->pair->datamask_modify;
+      } else {
+        datamask_read_device   |= force->pair->datamask_read;
+        datamask_modify_device |= force->pair->datamask_modify;
+      }
+    }
+    if ( atomKK->molecular && force->bond )  {
+      if (force->bond->execution_space==Host) {
+        execute_on_host  = true;
+        datamask_read_host   |= force->bond->datamask_read;
+        datamask_modify_device |= force->bond->datamask_modify;
+      } else {
+        datamask_read_device   |= force->bond->datamask_read;
+        datamask_modify_device |= force->bond->datamask_modify;
+      }
+    }
+    if ( atomKK->molecular && force->angle ) {
+      if (force->angle->execution_space==Host) {
+        execute_on_host  = true;
+        datamask_read_host   |= force->angle->datamask_read;
+        datamask_modify_device |= force->angle->datamask_modify;
+      } else {
+        datamask_read_device   |= force->angle->datamask_read;
+        datamask_modify_device |= force->angle->datamask_modify;
+      }
+    }
+    if ( atomKK->molecular && force->dihedral ) {
+      if (force->dihedral->execution_space==Host) {
+        execute_on_host  = true;
+        datamask_read_host   |= force->dihedral->datamask_read;
+        datamask_modify_device |= force->dihedral->datamask_modify;
+      } else {
+        datamask_read_device   |= force->dihedral->datamask_read;
+        datamask_modify_device |= force->dihedral->datamask_modify;
+      }
+    }
+    if ( atomKK->molecular && force->improper ) {
+      if (force->improper->execution_space==Host) {
+        execute_on_host  = true;
+        datamask_read_host   |= force->improper->datamask_read;
+        datamask_modify_device |= force->improper->datamask_modify;
+      } else {
+        datamask_read_device   |= force->improper->datamask_read;
+        datamask_modify_device |= force->improper->datamask_modify;
+      }
+    }
+    if ( kspace_compute_flag ) {
+      if (force->kspace->execution_space==Host) {
+        execute_on_host  = true;
+        datamask_read_host   |= force->kspace->datamask_read;
+        datamask_modify_device |= force->kspace->datamask_modify;
+      } else {
+        datamask_read_device   |= force->kspace->datamask_read;
+        datamask_modify_device |= force->kspace->datamask_modify;
+      }
+    }
+
 
     if (pair_compute_flag) {
       atomKK->sync(force->pair->execution_space,force->pair->datamask_read);
       atomKK->modified(force->pair->execution_space,force->pair->datamask_modify);
+      atomKK->sync(force->pair->execution_space,~(~force->pair->datamask_read|(F_MASK | ENERGY_MASK | VIRIAL_MASK)));
+      atomKK->modified(force->pair->execution_space,~(~force->pair->datamask_modify|(F_MASK | ENERGY_MASK | VIRIAL_MASK)));
+      Kokkos::Impl::Timer ktimer;
       force->pair->compute(eflag,vflag);
       timer->stamp(Timer::PAIR);
     }
 
+      if(execute_on_host) {
+        if(pair_compute_flag && force->pair->datamask_modify!=(F_MASK | ENERGY_MASK | VIRIAL_MASK))
+          Kokkos::fence();
+        atomKK->sync_overlapping_device(Host,~(~datamask_read_host|(F_MASK | ENERGY_MASK | VIRIAL_MASK)));
+        if(pair_compute_flag && force->pair->execution_space!=Host) {
+          Kokkos::deep_copy(LMPHostType(),atomKK->k_f.h_view,0.0);
+        }
+    }
+
     if (atomKK->molecular) {
       if (force->bond) {
-        atomKK->sync(force->bond->execution_space,force->bond->datamask_read);
-        atomKK->modified(force->bond->execution_space,force->bond->datamask_modify);
+        atomKK->sync(force->bond->execution_space,~(~force->bond->datamask_read|(F_MASK | ENERGY_MASK | VIRIAL_MASK)));
+        atomKK->modified(force->bond->execution_space,~(~force->bond->datamask_modify|(F_MASK | ENERGY_MASK | VIRIAL_MASK)));
         force->bond->compute(eflag,vflag);
       }
       if (force->angle) {
-        atomKK->sync(force->angle->execution_space,force->angle->datamask_read);
-        atomKK->modified(force->angle->execution_space,force->angle->datamask_modify);
+        atomKK->sync(force->angle->execution_space,~(~force->angle->datamask_read|(F_MASK | ENERGY_MASK | VIRIAL_MASK)));
+        atomKK->modified(force->angle->execution_space,~(~force->angle->datamask_modify|(F_MASK | ENERGY_MASK | VIRIAL_MASK)));
         force->angle->compute(eflag,vflag);
       }
       if (force->dihedral) {
-        atomKK->sync(force->dihedral->execution_space,force->dihedral->datamask_read);
-        atomKK->modified(force->dihedral->execution_space,force->dihedral->datamask_modify);
+        atomKK->sync(force->dihedral->execution_space,~(~force->dihedral->datamask_read|(F_MASK | ENERGY_MASK | VIRIAL_MASK)));
+        atomKK->modified(force->dihedral->execution_space,~(~force->dihedral->datamask_modify|(F_MASK | ENERGY_MASK | VIRIAL_MASK)));
         force->dihedral->compute(eflag,vflag);
       }
       if (force->improper) {
-        atomKK->sync(force->improper->execution_space,force->improper->datamask_read);
-        atomKK->modified(force->improper->execution_space,force->improper->datamask_modify);
+        atomKK->sync(force->improper->execution_space,~(~force->improper->datamask_read|(F_MASK | ENERGY_MASK | VIRIAL_MASK)));
+        atomKK->modified(force->improper->execution_space,~(~force->improper->datamask_modify|(F_MASK | ENERGY_MASK | VIRIAL_MASK)));
         force->improper->compute(eflag,vflag);
       }
       timer->stamp(Timer::BOND);
     }
 
     if (kspace_compute_flag) {
-      atomKK->sync(force->kspace->execution_space,force->kspace->datamask_read);
-      atomKK->modified(force->kspace->execution_space,force->kspace->datamask_modify);
+      atomKK->sync(force->kspace->execution_space,~(~force->kspace->datamask_read|(F_MASK | ENERGY_MASK | VIRIAL_MASK)));
+      atomKK->modified(force->kspace->execution_space,~(~force->kspace->datamask_modify|(F_MASK | ENERGY_MASK | VIRIAL_MASK)));
       force->kspace->compute(eflag,vflag);
       timer->stamp(Timer::KSPACE);
     }
 
+    if(execute_on_host && !std::is_same<LMPHostType,LMPDeviceType>::value) {
+      if(f_merge_copy.dimension_0()<atomKK->k_f.dimension_0()) {
+        f_merge_copy = DAT::t_f_array("VerletKokkos::f_merge_copy",atomKK->k_f.dimension_0());
+      }
+      f = atomKK->k_f.d_view;
+      Kokkos::deep_copy(LMPHostType(),f_merge_copy,atomKK->k_f.h_view);
+      Kokkos::parallel_for(atomKK->k_f.dimension_0(),
+        ForceAdder<DAT::t_f_array,DAT::t_f_array>(atomKK->k_f.d_view,f_merge_copy));
+      atomKK->k_f.template modify<LMPDeviceType>();
+    }
+
+
     // reverse communication of forces
 
     if (force->newton) comm->reverse_comm();
@@ -414,15 +516,11 @@ void VerletKokkos::run(int n)
 
     // force modifications, final time integration, diagnostics
 
-    ktimer.reset();
-
     if (n_post_force) modify->post_force(vflag);
     modify->final_integrate();
     if (n_end_of_step) modify->end_of_step();
     timer->stamp(Timer::MODIFY);
 
-    time += ktimer.seconds();
-
     // all output
 
     if (ntimestep == output->next) {
@@ -506,3 +604,5 @@ void VerletKokkos::force_clear()
     }
   }
 }
+
+
diff --git a/src/KOKKOS/verlet_kokkos.h b/src/KOKKOS/verlet_kokkos.h
index 4b64ced5676062904f8eebf675f31c71f1346af5..03a93833245dfcf41d36f069eb4b4839b5c156df 100644
--- a/src/KOKKOS/verlet_kokkos.h
+++ b/src/KOKKOS/verlet_kokkos.h
@@ -21,6 +21,7 @@ IntegrateStyle(verlet/kk,VerletKokkos)
 #define LMP_VERLET_KOKKOS_H
 
 #include "verlet.h"
+#include "kokkos_type.h"
 
 namespace LAMMPS_NS {
 
@@ -32,8 +33,16 @@ class VerletKokkos : public Verlet {
   void setup_minimal(int);
   void run(int);
 
- protected:
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+    f(i,0) += f_merge_copy(i,0);
+    f(i,1) += f_merge_copy(i,1);
+    f(i,2) += f_merge_copy(i,2);
+  }
+
 
+ protected:
+  DAT::t_f_array f_merge_copy,f;
 
   void force_clear();
 };
diff --git a/src/lammps.cpp b/src/lammps.cpp
index 94a267db8a35504245ed73927595c5cec5f13246..56829e1246b926b489b082470f1b5f540ee64cef 100644
--- a/src/lammps.cpp
+++ b/src/lammps.cpp
@@ -650,7 +650,11 @@ void LAMMPS::create()
 
   if (kokkos) atom = new AtomKokkos(this);
   else atom = new Atom(this);
-  atom->create_avec("atomic",0,NULL,1);
+
+  if (kokkos)
+    atom->create_avec("atomic/kk",0,NULL,1);
+  else
+    atom->create_avec("atomic",0,NULL,1);
 
   group = new Group(this);
   force = new Force(this);    // must be after group, to create temperature