diff --git a/doc/src/Commands_fix.txt b/doc/src/Commands_fix.txt
index 3c17fb70568b898fa76618c9f6ccccbc043e8cc8..58828d5c9edb0da368a86018cf6f1b3e6087453d 100644
--- a/doc/src/Commands_fix.txt
+++ b/doc/src/Commands_fix.txt
@@ -68,11 +68,11 @@ OPT.
 "ffl"_fix_ffl.html,
 "filter/corotate"_fix_filter_corotate.html,
 "flow/gauss"_fix_flow_gauss.html,
-"freeze"_fix_freeze.html,
+"freeze (k)"_fix_freeze.html,
 "gcmc"_fix_gcmc.html,
 "gld"_fix_gld.html,
 "gle"_fix_gle.html,
-"gravity (o)"_fix_gravity.html,
+"gravity (ko)"_fix_gravity.html,
 "grem"_fix_grem.html,
 "halt"_fix_halt.html,
 "heat"_fix_heat.html,
@@ -105,7 +105,7 @@ OPT.
 "nph/asphere (o)"_fix_nph_asphere.html,
 "nph/body"_fix_nph_body.html,
 "nph/eff"_fix_nh_eff.html,
-"nph/sphere (o)"_fix_nph_sphere.html,
+"nph/sphere (ko)"_fix_nph_sphere.html,
 "nphug (o)"_fix_nphug.html,
 "npt (kio)"_fix_nh.html,
 "npt/asphere (o)"_fix_npt_asphere.html,
diff --git a/doc/src/Commands_pair.txt b/doc/src/Commands_pair.txt
index 5715c0123b278b5b34d6374abbc651caa99b3c54..090101d5eac69e7e4f7247dbd55fab6b1524e7bd 100644
--- a/doc/src/Commands_pair.txt
+++ b/doc/src/Commands_pair.txt
@@ -95,7 +95,7 @@ OPT.
 "gayberne (gio)"_pair_gayberne.html,
 "gran/hertz/history (o)"_pair_gran.html,
 "gran/hooke (o)"_pair_gran.html,
-"gran/hooke/history (o)"_pair_gran.html,
+"gran/hooke/history (ko)"_pair_gran.html,
 "gw"_pair_gw.html,
 "gw/zbl"_pair_gw.html,
 "hbond/dreiding/lj (o)"_pair_hbond_dreiding.html,
diff --git a/doc/src/fix_freeze.txt b/doc/src/fix_freeze.txt
index 89f2dd8179f9f6ab02cf054c24f790e9c10e7662..9e085d8b1dc68470b6d991b1ca430f055cb08d83 100644
--- a/doc/src/fix_freeze.txt
+++ b/doc/src/fix_freeze.txt
@@ -7,6 +7,7 @@
 :line
 
 fix freeze command :h3
+fix freeze/kk command :h3
 
 [Syntax:]
 
diff --git a/doc/src/fix_gravity.txt b/doc/src/fix_gravity.txt
index ea596c866867e89f3c4b6b8a598ead8662976e57..c529a04d34068940256e38bda3948cd796fcbe17 100644
--- a/doc/src/fix_gravity.txt
+++ b/doc/src/fix_gravity.txt
@@ -8,6 +8,7 @@
 
 fix gravity command :h3
 fix gravity/omp command :h3
+fix gravity/kk command :h3
 
 [Syntax:]
 
diff --git a/doc/src/fix_nve_sphere.txt b/doc/src/fix_nve_sphere.txt
index 818940a9bd1ebda7b50c44120a209aaa4ef4e270..225359dfa7f03b56e6e0636eca55f70384853e19 100644
--- a/doc/src/fix_nve_sphere.txt
+++ b/doc/src/fix_nve_sphere.txt
@@ -8,6 +8,7 @@
 
 fix nve/sphere command :h3
 fix nve/sphere/omp command :h3
+fix nve/sphere/kk command :h3
 
 [Syntax:]
 
diff --git a/doc/src/pair_gran.txt b/doc/src/pair_gran.txt
index 5beaf30e06edb9cd288ce9294b1b31eac664d110..e322f8da15b54e39bf9abe3702a6a3d77e130202 100644
--- a/doc/src/pair_gran.txt
+++ b/doc/src/pair_gran.txt
@@ -10,6 +10,7 @@ pair_style gran/hooke command :h3
 pair_style gran/hooke/omp command :h3
 pair_style gran/hooke/history command :h3
 pair_style gran/hooke/history/omp command :h3
+pair_style gran/hooke/history/kk command :h3
 pair_style gran/hertz/history command :h3
 pair_style gran/hertz/history/omp command :h3
 
diff --git a/src/GRANULAR/fix_freeze.h b/src/GRANULAR/fix_freeze.h
index 52badc3e1bed7a2aa35c0fdcf61c0673d3292f16..a249226aa7840d335924088919ea765af7996776 100644
--- a/src/GRANULAR/fix_freeze.h
+++ b/src/GRANULAR/fix_freeze.h
@@ -34,7 +34,7 @@ class FixFreeze : public Fix {
   void post_force_respa(int, int, int);
   double compute_vector(int);
 
- private:
+ protected:
   int force_flag;
   double foriginal[3],foriginal_all[3];
 };
diff --git a/src/GRANULAR/fix_pour.cpp b/src/GRANULAR/fix_pour.cpp
index 318c54c99a8844bea5f3f86d92c9ebd67f23abd3..3ffca8db9d77d304121c12894195f1f7013cd826 100644
--- a/src/GRANULAR/fix_pour.cpp
+++ b/src/GRANULAR/fix_pour.cpp
@@ -53,6 +53,8 @@ FixPour::FixPour(LAMMPS *lmp, int narg, char **arg) :
 {
   if (narg < 6) error->all(FLERR,"Illegal fix pour command");
 
+  if (lmp->kokkos) error->all(FLERR,"Cannot yet use fix pour with the KOKKOS package");
+
   time_depend = 1;
 
   if (!atom->radius_flag || !atom->rmass_flag)
@@ -181,6 +183,7 @@ FixPour::FixPour(LAMMPS *lmp, int narg, char **arg) :
   for (ifix = 0; ifix < modify->nfix; ifix++) {
     if (strcmp(modify->fix[ifix]->style,"gravity") == 0) break;
     if (strcmp(modify->fix[ifix]->style,"gravity/omp") == 0) break;
+    if (strstr(modify->fix[ifix]->style,"gravity/kk") != NULL) break;
   }
   if (ifix == modify->nfix)
     error->all(FLERR,"No fix gravity defined for fix pour");
@@ -315,6 +318,7 @@ void FixPour::init()
   for (ifix = 0; ifix < modify->nfix; ifix++) {
     if (strcmp(modify->fix[ifix]->style,"gravity") == 0) break;
     if (strcmp(modify->fix[ifix]->style,"gravity/omp") == 0) break;
+    if (strstr(modify->fix[ifix]->style,"gravity/kk") != NULL) break;
   }
   if (ifix == modify->nfix)
     error->all(FLERR,"No fix gravity defined for fix pour");
diff --git a/src/GRANULAR/fix_pour.h b/src/GRANULAR/fix_pour.h
index c65fa6a18f5b1257661883483cb6e9410eb38954..63d0d39152964677d199312a748a3669cc1e7867 100644
--- a/src/GRANULAR/fix_pour.h
+++ b/src/GRANULAR/fix_pour.h
@@ -89,6 +89,10 @@ Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
+E: Cannot yet use fix pour with the KOKKOS package
+
+This feature is not yet supported.
+
 E: Fix pour requires atom attributes radius, rmass
 
 The atom style defined does not have these attributes.
diff --git a/src/GRANULAR/pair_gran_hooke_history.cpp b/src/GRANULAR/pair_gran_hooke_history.cpp
index c014510fca2d36d9575c32600f64c27f3a426747..83f75c221dc0c9497407884a47594d5a3866b107 100644
--- a/src/GRANULAR/pair_gran_hooke_history.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history.cpp
@@ -63,6 +63,8 @@ PairGranHookeHistory::PairGranHookeHistory(LAMMPS *lmp) : Pair(lmp)
 
 PairGranHookeHistory::~PairGranHookeHistory()
 {
+  if (copymode) return;
+
   delete [] svector;
   if (fix_history) modify->delete_fix("NEIGH_HISTORY");
 
diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh
index 08c7468a4971a0fb1f00d8071f231b30d3e1e2c8..321fa34ad7657cb2eda951b32df114ef427091b9 100755
--- a/src/KOKKOS/Install.sh
+++ b/src/KOKKOS/Install.sh
@@ -71,6 +71,8 @@ action atom_vec_kokkos.cpp
 action atom_vec_kokkos.h
 action atom_vec_molecular_kokkos.cpp atom_vec_molecular.cpp
 action atom_vec_molecular_kokkos.h atom_vec_molecular.h
+action atom_vec_sphere_kokkos.cpp atom_vec_sphere.cpp
+action atom_vec_sphere_kokkos.h atom_vec_sphere.h
 action bond_class2_kokkos.cpp bond_class2.cpp 
 action bond_class2_kokkos.h bond_class2.h
 action bond_fene_kokkos.cpp bond_fene.cpp
@@ -97,8 +99,14 @@ action fix_enforce2d_kokkos.cpp
 action fix_enforce2d_kokkos.h
 action fix_eos_table_rx_kokkos.cpp fix_eos_table_rx.cpp
 action fix_eos_table_rx_kokkos.h fix_eos_table_rx.h  
+action fix_freeze_kokkos.cpp fix_freeze.cpp
+action fix_freeze_kokkos.h fix_freeze.h
+action fix_gravity_kokkos.cpp
+action fix_gravity_kokkos.h
 action fix_langevin_kokkos.cpp
 action fix_langevin_kokkos.h
+action fix_neigh_history_kokkos.cpp
+action fix_neigh_history_kokkos.h
 action fix_nh_kokkos.cpp
 action fix_nh_kokkos.h
 action fix_nph_kokkos.cpp
@@ -107,6 +115,8 @@ action fix_npt_kokkos.cpp
 action fix_npt_kokkos.h
 action fix_nve_kokkos.cpp
 action fix_nve_kokkos.h
+action fix_nve_sphere_kokkos.cpp
+action fix_nve_sphere_kokkos.h
 action fix_nvt_kokkos.cpp
 action fix_nvt_kokkos.h
 action fix_property_atom_kokkos.cpp
@@ -193,6 +203,8 @@ action pair_eam_fs_kokkos.cpp pair_eam_fs.cpp
 action pair_eam_fs_kokkos.h pair_eam_fs.h
 action pair_exp6_rx_kokkos.cpp pair_exp6_rx.cpp
 action pair_exp6_rx_kokkos.h pair_exp6_rx.h
+action pair_gran_hooke_history_kokkos.h pair_gran_hooke_history.h
+action pair_gran_hooke_history_kokkos.cpp pair_gran_hooke_history.cpp
 action pair_hybrid_kokkos.cpp
 action pair_hybrid_kokkos.h
 action pair_hybrid_overlay_kokkos.cpp
diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.cpp b/src/KOKKOS/atom_vec_dpd_kokkos.cpp
index 24b23c6ccf667f076c5fe6e51157a7fc4a555dc4..30db76e72346540fe7b897010dde370808541412 100644
--- a/src/KOKKOS/atom_vec_dpd_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_dpd_kokkos.cpp
@@ -48,6 +48,8 @@ AtomVecDPDKokkos::AtomVecDPDKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp)
   k_count = DAT::tdual_int_1d("atom::k_count",1);
   atomKK = (AtomKokkos *) atom;
   commKK = (CommKokkos *) comm;
+
+  no_comm_vel_flag = 1;
 }
 
 /* ----------------------------------------------------------------------
diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp
index 7e1cc200d36bec7d4c9571fb403ca8f4dbd0c242..ce36f590531167a429bd5e68a044eb576617213c 100644
--- a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp
@@ -26,7 +26,10 @@ using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-AtomVecHybridKokkos::AtomVecHybridKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) {}
+AtomVecHybridKokkos::AtomVecHybridKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) {
+  no_comm_vel_flag = 1;
+  no_border_vel_flag = 1;
+}
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp
index f2c04bec1b6281b3922513628b2a655e44c9baf7..83af437ebaaecb97a9af0ed8dedaee6842734638 100644
--- a/src/KOKKOS/atom_vec_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_kokkos.cpp
@@ -26,6 +26,9 @@ AtomVecKokkos::AtomVecKokkos(LAMMPS *lmp) : AtomVec(lmp)
   kokkosable = 1;
   buffer = NULL;
   buffer_size = 0;
+
+  no_comm_vel_flag = 0;
+  no_border_vel_flag = 1;
 }
 
 /* ---------------------------------------------------------------------- */
@@ -305,6 +308,277 @@ void AtomVecKokkos::unpack_comm_kokkos(const int &n, const int &first,
   }
 }
 
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType,int PBC_FLAG,int TRICLINIC,int DEFORM_VREMAP>
+struct AtomVecKokkos_PackCommVel {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
+  typename ArrayTypes<DeviceType>::t_int_1d _mask;
+  typename ArrayTypes<DeviceType>::t_v_array _v;
+  typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
+  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
+  const int _iswap;
+  X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
+  X_FLOAT _pbc[6];
+  X_FLOAT _h_rate[6];
+  const int _deform_vremap;
+
+  AtomVecKokkos_PackCommVel(
+    const typename DAT::tdual_x_array &x,
+    const typename DAT::tdual_int_1d &mask,
+    const typename DAT::tdual_v_array &v,
+    const typename DAT::tdual_xfloat_2d &buf,
+    const typename DAT::tdual_int_2d &list,
+    const int &iswap,
+    const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
+    const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc,
+    const double * const h_rate,
+    const int &deform_vremap):
+    _x(x.view<DeviceType>()),
+    _mask(mask.view<DeviceType>()),
+    _v(v.view<DeviceType>()),
+    _list(list.view<DeviceType>()),_iswap(iswap),
+    _xprd(xprd),_yprd(yprd),_zprd(zprd),
+    _xy(xy),_xz(xz),_yz(yz),
+    _deform_vremap(deform_vremap)
+  {
+    const size_t elements = 6;
+    const int maxsend = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
+    _buf = typename ArrayTypes<DeviceType>::t_xfloat_2d_um(buf.view<DeviceType>().data(),maxsend,elements);
+    _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
+    _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
+    _h_rate[0] = h_rate[0]; _h_rate[1] = h_rate[1]; _h_rate[2] = h_rate[2];
+    _h_rate[3] = h_rate[3]; _h_rate[4] = h_rate[4]; _h_rate[5] = h_rate[5];
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+    const int j = _list(_iswap,i);
+    if (PBC_FLAG == 0) {
+      _buf(i,0) = _x(j,0);
+      _buf(i,1) = _x(j,1);
+      _buf(i,2) = _x(j,2);
+      _buf(i,3) = _v(j,0);
+      _buf(i,4) = _v(j,1);
+      _buf(i,5) = _v(j,2);
+    } else {
+      if (TRICLINIC == 0) {
+      _buf(i,0) = _x(j,0) + _pbc[0]*_xprd;
+      _buf(i,1) = _x(j,1) + _pbc[1]*_yprd;
+      _buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
+           } else {
+      _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
+      _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
+      _buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
+      }
+
+      if (DEFORM_VREMAP == 0) {
+        _buf(i,3) = _v(j,0);
+        _buf(i,4) = _v(j,1);
+        _buf(i,5) = _v(j,2);
+      } else {
+        if (_mask(i) & _deform_vremap) {
+          _buf(i,3) = _v(j,0) + _pbc[0]*_h_rate[0] + _pbc[5]*_h_rate[5] + _pbc[4]*_h_rate[4];
+          _buf(i,4) = _v(j,1) + _pbc[1]*_h_rate[1] + _pbc[3]*_h_rate[3];
+          _buf(i,5) = _v(j,2) + _pbc[2]*_h_rate[2];
+        } else {
+          _buf(i,3) = _v(j,0);
+          _buf(i,4) = _v(j,1);
+          _buf(i,5) = _v(j,2);
+        }
+      }
+    }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecKokkos::pack_comm_vel_kokkos(
+  const int &n,
+  const DAT::tdual_int_2d &list,
+  const int & iswap,
+  const DAT::tdual_xfloat_2d &buf,
+  const int &pbc_flag,
+  const int* const pbc)
+{
+  if(commKK->forward_comm_on_host) {
+    sync(Host,X_MASK|V_MASK);
+    if (pbc_flag) {
+      if (deform_vremap) {
+        if (domain->triclinic) {
+          struct AtomVecKokkos_PackCommVel<LMPHostType,1,1,1> f(
+            atomKK->k_x,atomKK->k_mask,
+            atomKK->k_v,
+            buf,list,iswap,
+            domain->xprd,domain->yprd,domain->zprd,
+            domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+          Kokkos::parallel_for(n,f);
+        } else {
+          struct AtomVecKokkos_PackCommVel<LMPHostType,1,0,1> f(
+            atomKK->k_x,atomKK->k_mask,
+            atomKK->k_v,
+            buf,list,iswap,
+            domain->xprd,domain->yprd,domain->zprd,
+            domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+          Kokkos::parallel_for(n,f);
+        }
+      } else {
+        if(domain->triclinic) {
+          struct AtomVecKokkos_PackCommVel<LMPHostType,1,1,0> f(
+            atomKK->k_x,atomKK->k_mask,
+            atomKK->k_v,
+            buf,list,iswap,
+            domain->xprd,domain->yprd,domain->zprd,
+            domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+          Kokkos::parallel_for(n,f);
+        } else {
+          struct AtomVecKokkos_PackCommVel<LMPHostType,1,0,0> f(
+            atomKK->k_x,atomKK->k_mask,
+            atomKK->k_v,
+            buf,list,iswap,
+            domain->xprd,domain->yprd,domain->zprd,
+            domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+          Kokkos::parallel_for(n,f);
+        }
+      }
+    } else {
+      if(domain->triclinic) {
+        struct AtomVecKokkos_PackCommVel<LMPHostType,0,1,0> f(
+          atomKK->k_x,atomKK->k_mask,
+          atomKK->k_v,
+          buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+        Kokkos::parallel_for(n,f);
+      } else {
+        struct AtomVecKokkos_PackCommVel<LMPHostType,0,0,0> f(
+          atomKK->k_x,atomKK->k_mask,
+          atomKK->k_v,
+          buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+        Kokkos::parallel_for(n,f);
+      }
+    }
+  } else {
+    sync(Device,X_MASK|V_MASK);
+    if(pbc_flag) {
+      if(deform_vremap) {
+        if(domain->triclinic) {
+          struct AtomVecKokkos_PackCommVel<LMPDeviceType,1,1,1> f(
+            atomKK->k_x,atomKK->k_mask,
+            atomKK->k_v,
+            buf,list,iswap,
+            domain->xprd,domain->yprd,domain->zprd,
+            domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+          Kokkos::parallel_for(n,f);
+        } else {
+          struct AtomVecKokkos_PackCommVel<LMPDeviceType,1,0,1> f(
+            atomKK->k_x,atomKK->k_mask,
+            atomKK->k_v,
+            buf,list,iswap,
+            domain->xprd,domain->yprd,domain->zprd,
+            domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+          Kokkos::parallel_for(n,f);
+        }
+      } else {
+        if(domain->triclinic) {
+          struct AtomVecKokkos_PackCommVel<LMPDeviceType,1,1,0> f(
+            atomKK->k_x,atomKK->k_mask,
+            atomKK->k_v,
+            buf,list,iswap,
+            domain->xprd,domain->yprd,domain->zprd,
+            domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+          Kokkos::parallel_for(n,f);
+        } else {
+          struct AtomVecKokkos_PackCommVel<LMPDeviceType,1,0,0> f(
+            atomKK->k_x,atomKK->k_mask,
+            atomKK->k_v,
+            buf,list,iswap,
+            domain->xprd,domain->yprd,domain->zprd,
+            domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+          Kokkos::parallel_for(n,f);
+        }
+      }
+    } else {
+      if(domain->triclinic) {
+        struct AtomVecKokkos_PackCommVel<LMPDeviceType,0,1,0> f(
+          atomKK->k_x,atomKK->k_mask,
+          atomKK->k_v,
+          buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+        Kokkos::parallel_for(n,f);
+      } else {
+        struct AtomVecKokkos_PackCommVel<LMPDeviceType,0,0,0> f(
+          atomKK->k_x,atomKK->k_mask,
+          atomKK->k_v,
+          buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+        Kokkos::parallel_for(n,f);
+      }
+    }
+  }
+  return n*6;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecKokkos_UnpackCommVel {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_x_array _x;
+  typename ArrayTypes<DeviceType>::t_v_array _v;
+  typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf;
+  int _first;
+
+  AtomVecKokkos_UnpackCommVel(
+    const typename DAT::tdual_x_array &x,
+    const typename DAT::tdual_v_array &v,
+    const typename DAT::tdual_xfloat_2d &buf,
+    const int& first):
+    _x(x.view<DeviceType>()),
+    _v(v.view<DeviceType>()),
+    _first(first)
+  {
+    const size_t elements = 6;
+    const int maxsend = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
+    buffer_view<DeviceType>(_buf,buf,maxsend,elements);
+  };
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+    _x(i+_first,0) = _buf(i,0);
+    _x(i+_first,1) = _buf(i,1);
+    _x(i+_first,2) = _buf(i,2);
+    _v(i+_first,0) = _buf(i,3);
+    _v(i+_first,1) = _buf(i,4);
+    _v(i+_first,2) = _buf(i,5);
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecKokkos::unpack_comm_vel_kokkos(const int &n, const int &first,
+    const DAT::tdual_xfloat_2d &buf ) {
+  if(commKK->forward_comm_on_host) {
+    sync(Host,X_MASK|V_MASK);
+    modified(Host,X_MASK|V_MASK);
+    struct AtomVecKokkos_UnpackCommVel<LMPHostType> f(atomKK->k_x,atomKK->k_v,buf,first);
+    Kokkos::parallel_for(n,f);
+  } else {
+    sync(Device,X_MASK|V_MASK);
+    modified(Device,X_MASK|V_MASK);
+    struct AtomVecKokkos_UnpackCommVel<LMPDeviceType> f(atomKK->k_x,atomKK->k_v,buf,first);
+    Kokkos::parallel_for(n,f);
+  }
+}
+
 /* ---------------------------------------------------------------------- */
 
 int AtomVecKokkos::pack_comm(int n, int *list, double *buf,
diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h
index b68fdc9b209dcefa6e8ee36c21053a396bc73396..efe55c47ad00215fc94f5fe8b3c231bd85d5de38 100644
--- a/src/KOKKOS/atom_vec_kokkos.h
+++ b/src/KOKKOS/atom_vec_kokkos.h
@@ -60,6 +60,15 @@ class AtomVecKokkos : public AtomVec {
     unpack_comm_kokkos(const int &n, const int &nfirst,
                        const DAT::tdual_xfloat_2d &buf);
 
+  virtual int
+    pack_comm_vel_kokkos(const int &n, const DAT::tdual_int_2d &list,
+                         const int & iswap, const DAT::tdual_xfloat_2d &buf,
+                         const int &pbc_flag, const int pbc[]);
+
+  virtual void
+    unpack_comm_vel_kokkos(const int &n, const int &nfirst,
+                           const DAT::tdual_xfloat_2d &buf);
+
   virtual int
     unpack_reverse_self(const int &n, const DAT::tdual_int_2d &list,
                       const int & iswap, const int nfirst);
@@ -82,6 +91,16 @@ class AtomVecKokkos : public AtomVec {
                          const DAT::tdual_xfloat_2d &buf,
                          ExecutionSpace space) = 0;
 
+  virtual int
+    pack_border_vel_kokkos(int n, DAT::tdual_int_2d k_sendlist,
+                           DAT::tdual_xfloat_2d buf,int iswap,
+                           int pbc_flag, int *pbc, ExecutionSpace space) { return 0; }
+
+  virtual void
+    unpack_border_vel_kokkos(const int &n, const int &nfirst,
+                             const DAT::tdual_xfloat_2d &buf,
+                             ExecutionSpace space) {}
+
   virtual int
     pack_exchange_kokkos(const int &nsend, DAT::tdual_xfloat_2d &buf,
                          DAT::tdual_int_1d k_sendlist,
@@ -94,6 +113,8 @@ class AtomVecKokkos : public AtomVec {
                            ExecutionSpace space) = 0;
 
 
+  int no_comm_vel_flag,no_border_vel_flag;
+
  protected:
 
   HAT::t_x_array h_x;
diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.cpp b/src/KOKKOS/atom_vec_sphere_kokkos.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3dfbc5efdca0f6fe2c9ba87600396971439684d6
--- /dev/null
+++ b/src/KOKKOS/atom_vec_sphere_kokkos.cpp
@@ -0,0 +1,2903 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include <cmath>
+#include <cstdlib>
+#include <cstring>
+#include "atom_vec_sphere_kokkos.h"
+#include "atom_kokkos.h"
+#include "atom_masks.h"
+#include "comm_kokkos.h"
+#include "domain.h"
+#include "modify.h"
+#include "force.h"
+#include "fix.h"
+#include "fix_adapt.h"
+#include "math_const.h"
+#include "memory.h"
+#include "error.h"
+#include "memory_kokkos.h"
+
+using namespace LAMMPS_NS;
+
+#define DELTA 10000
+
+static const double MY_PI  = 3.14159265358979323846; // pi
+
+/* ---------------------------------------------------------------------- */
+
+AtomVecSphereKokkos::AtomVecSphereKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp)
+{
+  molecular = 0;
+
+  comm_x_only = 1;
+  comm_f_only = 0;
+  size_forward = 3;
+  size_reverse = 6;
+  size_border = 8;
+  size_velocity = 6;
+  size_data_atom = 7;
+  size_data_vel = 7;
+  xcol_data = 5;
+
+  atom->sphere_flag = 1;
+  atom->radius_flag = atom->rmass_flag = atom->omega_flag =
+    atom->torque_flag = 1;
+
+  k_count = DAT::tdual_int_1d("atom::k_count",1);
+  atomKK = (AtomKokkos *) atom;
+  commKK = (CommKokkos *) comm;
+
+  no_border_vel_flag = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecSphereKokkos::init()
+{
+  AtomVec::init();
+
+  // set radvary if particle diameters are time-varying due to fix adapt
+
+  radvary = 0;
+  comm_x_only = 1;
+  size_forward = 3;
+
+  for (int i = 0; i < modify->nfix; i++) {
+    if (strcmp(modify->fix[i]->style,"adapt") == 0) {
+      FixAdapt *fix = (FixAdapt *) modify->fix[i];
+      if (fix->diamflag) {
+        radvary = 1;
+        comm_x_only = 0;
+        size_forward = 5;
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   grow atom arrays
+   n = 0 grows arrays by a chunk
+   n > 0 allocates arrays to size n
+------------------------------------------------------------------------- */
+
+void AtomVecSphereKokkos::grow(int n)
+{
+  if (n == 0) nmax += DELTA;
+  else nmax = n;
+  atom->nmax = nmax;
+  if (nmax < 0 || nmax > MAXSMALLINT)
+    error->one(FLERR,"Per-processor system is too big");
+
+  sync(Device,ALL_MASK);
+  modified(Device,ALL_MASK);
+
+  memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag");
+  memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type");
+  memoryKK->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask");
+  memoryKK->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image");
+
+  memoryKK->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x");
+  memoryKK->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v");
+  memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f");
+  memoryKK->grow_kokkos(atomKK->k_radius,atomKK->radius,nmax,"atom:radius");
+  memoryKK->grow_kokkos(atomKK->k_rmass,atomKK->rmass,nmax,"atom:rmass");
+  memoryKK->grow_kokkos(atomKK->k_omega,atomKK->omega,nmax,3,"atom:omega");
+  memoryKK->grow_kokkos(atomKK->k_torque,atomKK->torque,nmax,3,"atom:torque");
+
+  if (atom->nextra_grow)
+    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
+      modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax);
+
+  grow_reset();
+  sync(Host,ALL_MASK);
+}
+
+/* ----------------------------------------------------------------------
+   reset local array ptrs
+------------------------------------------------------------------------- */
+
+void AtomVecSphereKokkos::grow_reset()
+{
+  tag = atomKK->tag;
+  d_tag = atomKK->k_tag.d_view;
+  h_tag = atomKK->k_tag.h_view;
+
+  type = atomKK->type;
+  d_type = atomKK->k_type.d_view;
+  h_type = atomKK->k_type.h_view;
+  mask = atomKK->mask;
+  d_mask = atomKK->k_mask.d_view;
+  h_mask = atomKK->k_mask.h_view;
+  image = atomKK->image;
+  d_image = atomKK->k_image.d_view;
+  h_image = atomKK->k_image.h_view;
+
+  x = atomKK->x;
+  d_x = atomKK->k_x.d_view;
+  h_x = atomKK->k_x.h_view;
+  v = atomKK->v;
+  d_v = atomKK->k_v.d_view;
+  h_v = atomKK->k_v.h_view;
+  f = atomKK->f;
+  d_f = atomKK->k_f.d_view;
+  h_f = atomKK->k_f.h_view;
+  radius = atomKK->radius;
+  d_radius = atomKK->k_radius.d_view;
+  h_radius = atomKK->k_radius.h_view;
+  rmass = atomKK->rmass;
+  d_rmass = atomKK->k_rmass.d_view;
+  h_rmass = atomKK->k_rmass.h_view;
+  omega = atomKK->omega;
+  d_omega = atomKK->k_omega.d_view;
+  h_omega = atomKK->k_omega.h_view;
+  torque = atomKK->torque;
+  d_torque = atomKK->k_torque.d_view;
+  h_torque = atomKK->k_torque.h_view;
+}
+
+/* ----------------------------------------------------------------------
+   copy atom I info to atom J
+------------------------------------------------------------------------- */
+
+void AtomVecSphereKokkos::copy(int i, int j, int delflag)
+{
+  sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
+            MASK_MASK | IMAGE_MASK | RADIUS_MASK |
+            RMASS_MASK | OMEGA_MASK);
+
+  h_tag[j] = h_tag[i];
+  h_type[j] = h_type[i];
+  h_mask[j] = h_mask[i];
+  h_image[j] = h_image[i];
+  h_x(j,0) = h_x(i,0);
+  h_x(j,1) = h_x(i,1);
+  h_x(j,2) = h_x(i,2);
+  h_v(j,0) = h_v(i,0);
+  h_v(j,1) = h_v(i,1);
+  h_v(j,2) = h_v(i,2);
+
+  h_radius[j] = h_radius[i];
+  h_rmass[j] = h_rmass[i];
+  h_omega(j,0) = h_omega(i,0);
+  h_omega(j,1) = h_omega(i,1);
+  h_omega(j,2) = h_omega(i,2);
+
+  if (atom->nextra_grow)
+    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
+      modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag);
+
+  modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
+                MASK_MASK | IMAGE_MASK | RADIUS_MASK |
+                RMASS_MASK | OMEGA_MASK);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType,int PBC_FLAG,int TRICLINIC>
+struct AtomVecSphereKokkos_PackComm {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
+  typename ArrayTypes<DeviceType>::t_float_1d _radius,_rmass;
+  typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
+  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
+  const int _iswap;
+  X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
+  X_FLOAT _pbc[6];
+
+  AtomVecSphereKokkos_PackComm(
+    const typename DAT::tdual_x_array &x,
+    const typename DAT::tdual_float_1d &radius,
+    const typename DAT::tdual_float_1d &rmass,
+    const typename DAT::tdual_xfloat_2d &buf,
+    const typename DAT::tdual_int_2d &list,
+    const int & iswap,
+    const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
+    const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
+    _x(x.view<DeviceType>()),
+    _radius(radius.view<DeviceType>()),
+    _rmass(rmass.view<DeviceType>()),
+    _list(list.view<DeviceType>()),_iswap(iswap),
+    _xprd(xprd),_yprd(yprd),_zprd(zprd),
+    _xy(xy),_xz(xz),_yz(yz) {
+    const size_t elements = 5;
+    const size_t maxsend = (buf.view<DeviceType>().extent(0)*buf.view<DeviceType>().extent(1))/elements;
+    _buf = typename ArrayTypes<DeviceType>::t_xfloat_2d_um(buf.view<DeviceType>().data(),maxsend,elements);
+    _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
+    _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+    const int j = _list(_iswap,i);
+    if (PBC_FLAG == 0) {
+      _buf(i,0) = _x(j,0);
+      _buf(i,1) = _x(j,1);
+      _buf(i,2) = _x(j,2);
+    } else {
+      if (TRICLINIC == 0) {
+	_buf(i,0) = _x(j,0) + _pbc[0]*_xprd;
+	_buf(i,1) = _x(j,1) + _pbc[1]*_yprd;
+	_buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
+      } else {
+	_buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
+	_buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
+	_buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
+      }
+    }
+    _buf(i,3) = _radius(j);
+    _buf(i,4) = _rmass(j);
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecSphereKokkos::pack_comm_kokkos(
+  const int &n,
+  const DAT::tdual_int_2d &list,
+  const int & iswap,
+  const DAT::tdual_xfloat_2d &buf,
+  const int &pbc_flag,
+  const int* const pbc)
+{
+  // Fallback to AtomVecKokkos if radvary == 0
+  if (radvary == 0)
+    return AtomVecKokkos::pack_comm_kokkos(n,list,iswap,buf,pbc_flag,pbc);
+  // Check whether to always run forward communication on the host
+  // Choose correct forward PackComm kernel
+  if(commKK->forward_comm_on_host) {
+    sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
+    if(pbc_flag) {
+      if(domain->triclinic) {
+        struct AtomVecSphereKokkos_PackComm<LMPHostType,1,1> f(
+          atomKK->k_x,
+          atomKK->k_radius,atomKK->k_rmass,
+          buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      } else {
+        struct AtomVecSphereKokkos_PackComm<LMPHostType,1,0> f(
+          atomKK->k_x,
+          atomKK->k_radius,atomKK->k_rmass,
+          buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      }
+    } else {
+      if(domain->triclinic) {
+        struct AtomVecSphereKokkos_PackComm<LMPHostType,0,1> f(
+          atomKK->k_x,
+          atomKK->k_radius,atomKK->k_rmass,
+          buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      } else {
+        struct AtomVecSphereKokkos_PackComm<LMPHostType,0,0> f(
+          atomKK->k_x,
+          atomKK->k_radius,atomKK->k_rmass,
+          buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      }
+    }
+  } else {
+    sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
+    if(pbc_flag) {
+      if(domain->triclinic) {
+        struct AtomVecSphereKokkos_PackComm<LMPDeviceType,1,1> f(
+          atomKK->k_x,
+          atomKK->k_radius,atomKK->k_rmass,
+          buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      } else {
+        struct AtomVecSphereKokkos_PackComm<LMPDeviceType,1,0> f(
+          atomKK->k_x,
+          atomKK->k_radius,atomKK->k_rmass,
+          buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      }
+    } else {
+      if(domain->triclinic) {
+        struct AtomVecSphereKokkos_PackComm<LMPDeviceType,0,1> f(
+          atomKK->k_x,
+          atomKK->k_radius,atomKK->k_rmass,
+          buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      } else {
+        struct AtomVecSphereKokkos_PackComm<LMPDeviceType,0,0> f(
+          atomKK->k_x,
+          atomKK->k_radius,atomKK->k_rmass,
+          buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      }
+    }
+  }
+  return n*size_forward;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType,int RADVARY,int PBC_FLAG,int TRICLINIC,int DEFORM_VREMAP>
+struct AtomVecSphereKokkos_PackCommVel {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
+  typename ArrayTypes<DeviceType>::t_int_1d _mask;
+  typename ArrayTypes<DeviceType>::t_float_1d _radius,_rmass;
+  typename ArrayTypes<DeviceType>::t_v_array _v, _omega;
+  typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
+  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
+  const int _iswap;
+  X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
+  X_FLOAT _pbc[6];
+  X_FLOAT _h_rate[6];
+  const int _deform_vremap;
+
+  AtomVecSphereKokkos_PackCommVel(
+    const typename DAT::tdual_x_array &x,
+    const typename DAT::tdual_int_1d &mask,
+    const typename DAT::tdual_float_1d &radius,
+    const typename DAT::tdual_float_1d &rmass,
+    const typename DAT::tdual_v_array &v,
+    const typename DAT::tdual_v_array &omega,
+    const typename DAT::tdual_xfloat_2d &buf,
+    const typename DAT::tdual_int_2d &list,
+    const int &iswap,
+    const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
+    const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc,
+    const double * const h_rate,
+    const int &deform_vremap):
+    _x(x.view<DeviceType>()),
+    _mask(mask.view<DeviceType>()),
+    _radius(radius.view<DeviceType>()),
+    _rmass(rmass.view<DeviceType>()),
+    _v(v.view<DeviceType>()),
+    _omega(omega.view<DeviceType>()),
+    _list(list.view<DeviceType>()),_iswap(iswap),
+    _xprd(xprd),_yprd(yprd),_zprd(zprd),
+    _xy(xy),_xz(xz),_yz(yz),
+    _deform_vremap(deform_vremap)
+  {
+    const size_t elements = 9 + 2 * RADVARY;
+    const int maxsend = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
+    _buf = typename ArrayTypes<DeviceType>::t_xfloat_2d_um(buf.view<DeviceType>().data(),maxsend,elements);
+    _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
+    _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
+    _h_rate[0] = h_rate[0]; _h_rate[1] = h_rate[1]; _h_rate[2] = h_rate[2];
+    _h_rate[3] = h_rate[3]; _h_rate[4] = h_rate[4]; _h_rate[5] = h_rate[5];
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+    const int j = _list(_iswap,i);
+    if (PBC_FLAG == 0) {
+      _buf(i,0) = _x(j,0);
+      _buf(i,1) = _x(j,1);
+      _buf(i,2) = _x(j,2);
+    } else {
+      if (TRICLINIC == 0) {
+	_buf(i,0) = _x(j,0) + _pbc[0]*_xprd;
+	_buf(i,1) = _x(j,1) + _pbc[1]*_yprd;
+	_buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
+      } else {
+	_buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
+	_buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
+	_buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
+      }
+    }
+    if (DEFORM_VREMAP == 0) {
+      _buf(i,3) = _v(j,0);
+      _buf(i,4) = _v(j,1);
+      _buf(i,5) = _v(j,2);
+    } else {
+      if (_mask(i) & _deform_vremap) {
+        _buf(i,3) = _v(j,0) + _pbc[0]*_h_rate[0] + _pbc[5]*_h_rate[5] + _pbc[4]*_h_rate[4];
+        _buf(i,4) = _v(j,1) + _pbc[1]*_h_rate[1] + _pbc[3]*_h_rate[3];
+        _buf(i,5) = _v(j,2) + _pbc[2]*_h_rate[2];
+      } else {
+        _buf(i,3) = _v(j,0);
+        _buf(i,4) = _v(j,1);
+        _buf(i,5) = _v(j,2);
+      }
+    }
+    _buf(i,6) = _omega(j,0);
+    _buf(i,7) = _omega(j,1);
+    _buf(i,8) = _omega(j,2);
+    if (RADVARY) {
+      _buf(i,9) = _radius(j);
+      _buf(i,10) = _rmass(j);
+    }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecSphereKokkos::pack_comm_vel_kokkos(
+  const int &n,
+  const DAT::tdual_int_2d &list,
+  const int & iswap,
+  const DAT::tdual_xfloat_2d &buf,
+  const int &pbc_flag,
+  const int* const pbc)
+{
+  if(commKK->forward_comm_on_host) {
+    sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
+    if(pbc_flag) {
+      if(deform_vremap) {
+        if(domain->triclinic) {
+          if (radvary == 0) {
+            struct AtomVecSphereKokkos_PackCommVel<LMPHostType,0,1,1,1> f(
+              atomKK->k_x,atomKK->k_mask,
+              atomKK->k_radius,atomKK->k_rmass,
+              atomKK->k_v,atomKK->k_omega,
+              buf,list,iswap,
+              domain->xprd,domain->yprd,domain->zprd,
+              domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+            Kokkos::parallel_for(n,f);
+          } else {
+            struct AtomVecSphereKokkos_PackCommVel<LMPHostType,1,1,1,1> f(
+              atomKK->k_x,atomKK->k_mask,
+              atomKK->k_radius,atomKK->k_rmass,
+              atomKK->k_v,atomKK->k_omega,
+              buf,list,iswap,
+              domain->xprd,domain->yprd,domain->zprd,
+              domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+            Kokkos::parallel_for(n,f);
+          }
+        } else {
+          if (radvary == 0) {
+            struct AtomVecSphereKokkos_PackCommVel<LMPHostType,0,1,0,1> f(
+              atomKK->k_x,atomKK->k_mask,
+              atomKK->k_radius,atomKK->k_rmass,
+              atomKK->k_v,atomKK->k_omega,
+              buf,list,iswap,
+              domain->xprd,domain->yprd,domain->zprd,
+              domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+            Kokkos::parallel_for(n,f);
+          } else {
+            struct AtomVecSphereKokkos_PackCommVel<LMPHostType,1,1,0,1> f(
+              atomKK->k_x,atomKK->k_mask,
+              atomKK->k_radius,atomKK->k_rmass,
+              atomKK->k_v,atomKK->k_omega,
+              buf,list,iswap,
+              domain->xprd,domain->yprd,domain->zprd,
+              domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+            Kokkos::parallel_for(n,f);
+          }
+        }
+      } else {
+        if(domain->triclinic) {
+          if (radvary == 0) {
+            struct AtomVecSphereKokkos_PackCommVel<LMPHostType,0,1,1,0> f(
+              atomKK->k_x,atomKK->k_mask,
+              atomKK->k_radius,atomKK->k_rmass,
+              atomKK->k_v,atomKK->k_omega,
+              buf,list,iswap,
+              domain->xprd,domain->yprd,domain->zprd,
+              domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+            Kokkos::parallel_for(n,f);
+          } else {
+            struct AtomVecSphereKokkos_PackCommVel<LMPHostType,1,1,1,0> f(
+              atomKK->k_x,atomKK->k_mask,
+              atomKK->k_radius,atomKK->k_rmass,
+              atomKK->k_v,atomKK->k_omega,
+              buf,list,iswap,
+              domain->xprd,domain->yprd,domain->zprd,
+              domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+            Kokkos::parallel_for(n,f);
+          }
+        } else {
+          if (radvary == 0) {
+            struct AtomVecSphereKokkos_PackCommVel<LMPHostType,0,1,0,0> f(
+              atomKK->k_x,atomKK->k_mask,
+              atomKK->k_radius,atomKK->k_rmass,
+              atomKK->k_v,atomKK->k_omega,
+              buf,list,iswap,
+              domain->xprd,domain->yprd,domain->zprd,
+              domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+            Kokkos::parallel_for(n,f);
+          } else {
+            struct AtomVecSphereKokkos_PackCommVel<LMPHostType,1,1,0,0> f(
+              atomKK->k_x,atomKK->k_mask,
+              atomKK->k_radius,atomKK->k_rmass,
+              atomKK->k_v,atomKK->k_omega,
+              buf,list,iswap,
+              domain->xprd,domain->yprd,domain->zprd,
+              domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+            Kokkos::parallel_for(n,f);
+          }
+        }
+      }
+    } else {
+      if(domain->triclinic) {
+        if (radvary == 0) {
+          struct AtomVecSphereKokkos_PackCommVel<LMPHostType,0,0,1,0> f(
+            atomKK->k_x,atomKK->k_mask,
+            atomKK->k_radius,atomKK->k_rmass,
+            atomKK->k_v,atomKK->k_omega,
+            buf,list,iswap,
+            domain->xprd,domain->yprd,domain->zprd,
+            domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+          Kokkos::parallel_for(n,f);
+        } else {
+          struct AtomVecSphereKokkos_PackCommVel<LMPHostType,1,0,1,0> f(
+            atomKK->k_x,atomKK->k_mask,
+            atomKK->k_radius,atomKK->k_rmass,
+            atomKK->k_v,atomKK->k_omega,
+            buf,list,iswap,
+            domain->xprd,domain->yprd,domain->zprd,
+            domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+          Kokkos::parallel_for(n,f);
+        }
+      } else {
+        if (radvary == 0) {
+          struct AtomVecSphereKokkos_PackCommVel<LMPHostType,0,0,0,0> f(
+            atomKK->k_x,atomKK->k_mask,
+            atomKK->k_radius,atomKK->k_rmass,
+            atomKK->k_v,atomKK->k_omega,
+            buf,list,iswap,
+            domain->xprd,domain->yprd,domain->zprd,
+            domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+          Kokkos::parallel_for(n,f);
+        } else {
+          struct AtomVecSphereKokkos_PackCommVel<LMPHostType,1,0,0,0> f(
+            atomKK->k_x,atomKK->k_mask,
+            atomKK->k_radius,atomKK->k_rmass,
+            atomKK->k_v,atomKK->k_omega,
+            buf,list,iswap,
+            domain->xprd,domain->yprd,domain->zprd,
+            domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+          Kokkos::parallel_for(n,f);
+        }
+      }
+    }
+  } else {
+    sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
+    if(pbc_flag) {
+      if(deform_vremap) {
+        if(domain->triclinic) {
+          if (radvary == 0) {
+            struct AtomVecSphereKokkos_PackCommVel<LMPDeviceType,0,1,1,1> f(
+              atomKK->k_x,atomKK->k_mask,
+              atomKK->k_radius,atomKK->k_rmass,
+              atomKK->k_v,atomKK->k_omega,
+              buf,list,iswap,
+              domain->xprd,domain->yprd,domain->zprd,
+              domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+            Kokkos::parallel_for(n,f);
+          } else {
+            struct AtomVecSphereKokkos_PackCommVel<LMPDeviceType,1,1,1,1> f(
+              atomKK->k_x,atomKK->k_mask,
+              atomKK->k_radius,atomKK->k_rmass,
+              atomKK->k_v,atomKK->k_omega,
+              buf,list,iswap,
+              domain->xprd,domain->yprd,domain->zprd,
+              domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+            Kokkos::parallel_for(n,f);
+          }
+        } else {
+          if (radvary == 0) {
+            struct AtomVecSphereKokkos_PackCommVel<LMPDeviceType,0,1,0,1> f(
+              atomKK->k_x,atomKK->k_mask,
+              atomKK->k_radius,atomKK->k_rmass,
+              atomKK->k_v,atomKK->k_omega,
+              buf,list,iswap,
+              domain->xprd,domain->yprd,domain->zprd,
+              domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+            Kokkos::parallel_for(n,f);
+          } else {
+            struct AtomVecSphereKokkos_PackCommVel<LMPDeviceType,1,1,0,1> f(
+              atomKK->k_x,atomKK->k_mask,
+              atomKK->k_radius,atomKK->k_rmass,
+              atomKK->k_v,atomKK->k_omega,
+              buf,list,iswap,
+              domain->xprd,domain->yprd,domain->zprd,
+              domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+            Kokkos::parallel_for(n,f);
+          }
+        }
+      } else {
+        if(domain->triclinic) {
+          if (radvary == 0) {
+            struct AtomVecSphereKokkos_PackCommVel<LMPDeviceType,0,1,1,0> f(
+              atomKK->k_x,atomKK->k_mask,
+              atomKK->k_radius,atomKK->k_rmass,
+              atomKK->k_v,atomKK->k_omega,
+              buf,list,iswap,
+              domain->xprd,domain->yprd,domain->zprd,
+              domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+            Kokkos::parallel_for(n,f);
+          } else {
+            struct AtomVecSphereKokkos_PackCommVel<LMPDeviceType,1,1,1,0> f(
+              atomKK->k_x,atomKK->k_mask,
+              atomKK->k_radius,atomKK->k_rmass,
+              atomKK->k_v,atomKK->k_omega,
+              buf,list,iswap,
+              domain->xprd,domain->yprd,domain->zprd,
+              domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+            Kokkos::parallel_for(n,f);
+          }
+        } else {
+          if (radvary == 0) {
+            struct AtomVecSphereKokkos_PackCommVel<LMPDeviceType,0,1,0,0> f(
+              atomKK->k_x,atomKK->k_mask,
+              atomKK->k_radius,atomKK->k_rmass,
+              atomKK->k_v,atomKK->k_omega,
+              buf,list,iswap,
+              domain->xprd,domain->yprd,domain->zprd,
+              domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+            Kokkos::parallel_for(n,f);
+          } else {
+            struct AtomVecSphereKokkos_PackCommVel<LMPDeviceType,1,1,0,0> f(
+              atomKK->k_x,atomKK->k_mask,
+              atomKK->k_radius,atomKK->k_rmass,
+              atomKK->k_v,atomKK->k_omega,
+              buf,list,iswap,
+              domain->xprd,domain->yprd,domain->zprd,
+              domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+            Kokkos::parallel_for(n,f);
+          }
+        }
+      }
+    } else {
+      if(domain->triclinic) {
+        if (radvary == 0) {
+          struct AtomVecSphereKokkos_PackCommVel<LMPDeviceType,0,0,1,0> f(
+            atomKK->k_x,atomKK->k_mask,
+            atomKK->k_radius,atomKK->k_rmass,
+            atomKK->k_v,atomKK->k_omega,
+            buf,list,iswap,
+            domain->xprd,domain->yprd,domain->zprd,
+            domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+          Kokkos::parallel_for(n,f);
+        } else {
+          struct AtomVecSphereKokkos_PackCommVel<LMPDeviceType,1,0,1,0> f(
+            atomKK->k_x,atomKK->k_mask,
+            atomKK->k_radius,atomKK->k_rmass,
+            atomKK->k_v,atomKK->k_omega,
+            buf,list,iswap,
+            domain->xprd,domain->yprd,domain->zprd,
+            domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+          Kokkos::parallel_for(n,f);
+        }
+      } else {
+        if (radvary == 0) {
+          struct AtomVecSphereKokkos_PackCommVel<LMPDeviceType,0,0,0,0> f(
+            atomKK->k_x,atomKK->k_mask,
+            atomKK->k_radius,atomKK->k_rmass,
+            atomKK->k_v,atomKK->k_omega,
+            buf,list,iswap,
+            domain->xprd,domain->yprd,domain->zprd,
+            domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+          Kokkos::parallel_for(n,f);
+        } else {
+          struct AtomVecSphereKokkos_PackCommVel<LMPDeviceType,1,0,0,0> f(
+            atomKK->k_x,atomKK->k_mask,
+            atomKK->k_radius,atomKK->k_rmass,
+            atomKK->k_v,atomKK->k_omega,
+            buf,list,iswap,
+            domain->xprd,domain->yprd,domain->zprd,
+            domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
+          Kokkos::parallel_for(n,f);
+        }
+      }
+    }
+  }
+  return n*(size_forward+size_velocity);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType,int PBC_FLAG,int TRICLINIC>
+struct AtomVecSphereKokkos_PackCommSelf {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
+  typename ArrayTypes<DeviceType>::t_x_array _xw;
+  typename ArrayTypes<DeviceType>::t_float_1d _radius,_rmass;
+  int _nfirst;
+  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
+  const int _iswap;
+  X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
+  X_FLOAT _pbc[6];
+
+  AtomVecSphereKokkos_PackCommSelf(
+    const typename DAT::tdual_x_array &x,
+    const typename DAT::tdual_float_1d &radius,
+    const typename DAT::tdual_float_1d &rmass,
+    const int &nfirst,
+    const typename DAT::tdual_int_2d &list,
+    const int & iswap,
+    const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
+    const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
+    _x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),
+    _radius(radius.view<DeviceType>()),
+    _rmass(rmass.view<DeviceType>()),
+    _nfirst(nfirst),_list(list.view<DeviceType>()),_iswap(iswap),
+    _xprd(xprd),_yprd(yprd),_zprd(zprd),
+    _xy(xy),_xz(xz),_yz(yz) {
+    _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
+    _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
+  };
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+    const int j = _list(_iswap,i);
+    if (PBC_FLAG == 0) {
+      _xw(i+_nfirst,0) = _x(j,0);
+      _xw(i+_nfirst,1) = _x(j,1);
+      _xw(i+_nfirst,2) = _x(j,2);
+    } else {
+      if (TRICLINIC == 0) {
+	_xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd;
+	_xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd;
+	_xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
+      } else {
+	_xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
+	_xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
+	_xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
+      }
+    }
+    _radius(i+_nfirst) = _radius(j);
+    _rmass(i+_nfirst) = _rmass(j);
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecSphereKokkos::pack_comm_self(
+  const int &n, const DAT::tdual_int_2d &list, const int &iswap,
+  const int nfirst, const int &pbc_flag, const int* const pbc) {
+  // Fallback to AtomVecKokkos if radvary == 0
+  if (radvary == 0) 
+    return AtomVecKokkos::pack_comm_self(n,list,iswap,nfirst,pbc_flag,pbc);
+  if(commKK->forward_comm_on_host) {
+    sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
+    modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
+    if(pbc_flag) {
+      if(domain->triclinic) {
+	struct AtomVecSphereKokkos_PackCommSelf<LMPHostType,1,1> f(
+          atomKK->k_x,
+	  atomKK->k_radius,atomKK->k_rmass,
+	  nfirst,list,iswap,
+	  domain->xprd,domain->yprd,domain->zprd,
+	  domain->xy,domain->xz,domain->yz,pbc);
+	Kokkos::parallel_for(n,f);
+      } else {
+	struct AtomVecSphereKokkos_PackCommSelf<LMPHostType,1,0> f(
+          atomKK->k_x,
+	  atomKK->k_radius,atomKK->k_rmass,
+	  nfirst,list,iswap,
+	  domain->xprd,domain->yprd,domain->zprd,
+	  domain->xy,domain->xz,domain->yz,pbc);
+	Kokkos::parallel_for(n,f);
+      }
+    } else {
+      if(domain->triclinic) {
+	struct AtomVecSphereKokkos_PackCommSelf<LMPHostType,0,1> f(
+          atomKK->k_x,
+	  atomKK->k_radius,atomKK->k_rmass,
+	  nfirst,list,iswap,
+	  domain->xprd,domain->yprd,domain->zprd,
+	  domain->xy,domain->xz,domain->yz,pbc);
+	Kokkos::parallel_for(n,f);
+      } else {
+	struct AtomVecSphereKokkos_PackCommSelf<LMPHostType,0,0> f(
+          atomKK->k_x,
+	  atomKK->k_radius,atomKK->k_rmass,
+	  nfirst,list,iswap,
+	  domain->xprd,domain->yprd,domain->zprd,
+	  domain->xy,domain->xz,domain->yz,pbc);
+	Kokkos::parallel_for(n,f);
+      }
+    }
+  } else {
+    sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
+    modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
+    if(pbc_flag) {
+      if(domain->triclinic) {
+	struct AtomVecSphereKokkos_PackCommSelf<LMPDeviceType,1,1> f(
+          atomKK->k_x,
+	  atomKK->k_radius,atomKK->k_rmass,
+	  nfirst,list,iswap,
+	  domain->xprd,domain->yprd,domain->zprd,
+	  domain->xy,domain->xz,domain->yz,pbc);
+	Kokkos::parallel_for(n,f);
+      } else {
+	struct AtomVecSphereKokkos_PackCommSelf<LMPDeviceType,1,0> f(
+          atomKK->k_x,
+	  atomKK->k_radius,atomKK->k_rmass,
+	  nfirst,list,iswap,
+	  domain->xprd,domain->yprd,domain->zprd,
+	  domain->xy,domain->xz,domain->yz,pbc);
+	Kokkos::parallel_for(n,f);
+      }
+    } else {
+      if(domain->triclinic) {
+	struct AtomVecSphereKokkos_PackCommSelf<LMPDeviceType,0,1> f(
+          atomKK->k_x,
+	  atomKK->k_radius,atomKK->k_rmass,
+	  nfirst,list,iswap,
+	  domain->xprd,domain->yprd,domain->zprd,
+	  domain->xy,domain->xz,domain->yz,pbc);
+	Kokkos::parallel_for(n,f);
+      } else {
+	struct AtomVecSphereKokkos_PackCommSelf<LMPDeviceType,0,0> f(
+          atomKK->k_x,
+	  atomKK->k_radius,atomKK->k_rmass,
+	  nfirst,list,iswap,
+	  domain->xprd,domain->yprd,domain->zprd,
+	  domain->xy,domain->xz,domain->yz,pbc);
+	Kokkos::parallel_for(n,f);
+      }
+    }
+  }
+  return n*size_forward;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecSphereKokkos_UnpackComm {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_x_array _x;
+  typename ArrayTypes<DeviceType>::t_float_1d _radius,_rmass;
+  typename ArrayTypes<DeviceType>::t_xfloat_2d_const_um _buf;
+  int _first;
+
+  AtomVecSphereKokkos_UnpackComm(
+    const typename DAT::tdual_x_array &x,
+    const typename DAT::tdual_float_1d &radius,
+    const typename DAT::tdual_float_1d &rmass,
+    const typename DAT::tdual_xfloat_2d &buf,
+    const int& first):
+    _x(x.view<DeviceType>()),
+    _radius(radius.view<DeviceType>()),
+    _rmass(rmass.view<DeviceType>()),
+    _first(first)
+  {
+    const size_t elements = 5;
+    const size_t maxsend = (buf.view<DeviceType>().extent(0)*buf.view<DeviceType>().extent(1))/elements;
+    _buf = typename ArrayTypes<DeviceType>::t_xfloat_2d_const_um(buf.view<DeviceType>().data(),maxsend,elements);
+  };
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+    _x(i+_first,0) = _buf(i,0);
+    _x(i+_first,1) = _buf(i,1);
+    _x(i+_first,2) = _buf(i,2);
+    _radius(i+_first) = _buf(i,3);
+    _rmass(i+_first) = _buf(i,4);
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecSphereKokkos::unpack_comm_kokkos(
+  const int &n, const int &first,
+  const DAT::tdual_xfloat_2d &buf ) {
+  // Fallback to AtomVecKokkos if radvary == 0
+  if (radvary == 0) {
+    AtomVecKokkos::unpack_comm_kokkos(n,first,buf);
+    return;
+  }
+  if(commKK->forward_comm_on_host) {
+    modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
+    struct AtomVecSphereKokkos_UnpackComm<LMPHostType> f(
+      atomKK->k_x,
+      atomKK->k_radius,atomKK->k_rmass,
+      buf,first);
+    Kokkos::parallel_for(n,f);
+  } else {
+    modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
+    struct AtomVecSphereKokkos_UnpackComm<LMPDeviceType> f(
+      atomKK->k_x,
+      atomKK->k_radius,atomKK->k_rmass,
+      buf,first);
+    Kokkos::parallel_for(n,f);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType,int RADVARY>
+struct AtomVecSphereKokkos_UnpackCommVel {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_x_array _x;
+  typename ArrayTypes<DeviceType>::t_float_1d _radius,_rmass;
+  typename ArrayTypes<DeviceType>::t_v_array _v, _omega;
+  typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf;
+  int _first;
+
+  AtomVecSphereKokkos_UnpackCommVel(
+    const typename DAT::tdual_x_array &x,
+    const typename DAT::tdual_float_1d &radius,
+    const typename DAT::tdual_float_1d &rmass,
+    const typename DAT::tdual_v_array &v,
+    const typename DAT::tdual_v_array &omega,
+    const typename DAT::tdual_xfloat_2d &buf,
+    const int& first):
+    _x(x.view<DeviceType>()),
+    _radius(radius.view<DeviceType>()),
+    _rmass(rmass.view<DeviceType>()),
+    _v(v.view<DeviceType>()),
+    _omega(omega.view<DeviceType>()),
+    _first(first)
+  {
+    const size_t elements = 9 + 2 * RADVARY;
+    const int maxsend = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
+    buffer_view<DeviceType>(_buf,buf,maxsend,elements);
+  };
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+    _x(i+_first,0) = _buf(i,0);
+    _x(i+_first,1) = _buf(i,1);
+    _x(i+_first,2) = _buf(i,2);
+    _v(i+_first,0) = _buf(i,3);
+    _v(i+_first,1) = _buf(i,4);
+    _v(i+_first,2) = _buf(i,5);
+    _omega(i+_first,0) = _buf(i,6);
+    _omega(i+_first,1) = _buf(i,7);
+    _omega(i+_first,2) = _buf(i,8);
+    if (RADVARY) {
+      _radius(i+_first) = _buf(i,9);
+      _rmass(i+_first) = _buf(i,10);
+    }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecSphereKokkos::unpack_comm_vel_kokkos(
+  const int &n, const int &first,
+  const DAT::tdual_xfloat_2d &buf ) {
+  if(commKK->forward_comm_on_host) {
+    modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
+    if (radvary == 0) {
+      struct AtomVecSphereKokkos_UnpackCommVel<LMPHostType,0> f(
+        atomKK->k_x,
+        atomKK->k_radius,atomKK->k_rmass,
+        atomKK->k_v,atomKK->k_omega,
+        buf,first);
+      Kokkos::parallel_for(n,f);
+    } else {
+      struct AtomVecSphereKokkos_UnpackCommVel<LMPHostType,1> f(
+        atomKK->k_x,
+        atomKK->k_radius,atomKK->k_rmass,
+        atomKK->k_v,atomKK->k_omega,
+        buf,first);
+      Kokkos::parallel_for(n,f);
+    }
+  } else {
+    modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
+    if (radvary == 0) {
+      struct AtomVecSphereKokkos_UnpackCommVel<LMPDeviceType,0> f(
+        atomKK->k_x,
+        atomKK->k_radius,atomKK->k_rmass,
+        atomKK->k_v,atomKK->k_omega,
+        buf,first);
+      Kokkos::parallel_for(n,f);
+    } else {
+      struct AtomVecSphereKokkos_UnpackCommVel<LMPDeviceType,1> f(
+        atomKK->k_x,
+        atomKK->k_radius,atomKK->k_rmass,
+        atomKK->k_v,atomKK->k_omega,
+        buf,first);
+      Kokkos::parallel_for(n,f);
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecSphereKokkos::pack_comm(int n, int *list, double *buf,
+			           int pbc_flag, int *pbc)
+{
+  int i,j,m;
+  double dx,dy,dz;
+
+  if (radvary == 0) {
+    // Not sure if we need to call sync for X here
+    sync(Host,X_MASK);
+    m = 0;
+    if (pbc_flag == 0) {
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0);
+        buf[m++] = h_x(j,1);
+        buf[m++] = h_x(j,2);
+      }
+    } else {
+      if (domain->triclinic == 0) {
+        dx = pbc[0]*domain->xprd;
+        dy = pbc[1]*domain->yprd;
+        dz = pbc[2]*domain->zprd;
+      } else {
+        dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
+        dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
+        dz = pbc[2]*domain->zprd;
+      }
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0) + dx;
+        buf[m++] = h_x(j,1) + dy;
+        buf[m++] = h_x(j,2) + dz;
+      }
+    }
+  } else {
+    sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
+    m = 0;
+    if (pbc_flag == 0) {
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0);
+        buf[m++] = h_x(j,1);
+        buf[m++] = h_x(j,2);
+        buf[m++] = h_radius[j];
+        buf[m++] = h_rmass[j];
+      }
+    } else {
+      if (domain->triclinic == 0) {
+        dx = pbc[0]*domain->xprd;
+        dy = pbc[1]*domain->yprd;
+        dz = pbc[2]*domain->zprd;
+      } else {
+        dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
+        dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
+        dz = pbc[2]*domain->zprd;
+      }
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0) + dx;
+        buf[m++] = h_x(j,1) + dy;
+        buf[m++] = h_x(j,2) + dz;
+        buf[m++] = h_radius[j];
+        buf[m++] = h_rmass[j];
+      }
+    }
+  }
+
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecSphereKokkos::pack_comm_vel(int n, int *list, double *buf,
+				       int pbc_flag, int *pbc)
+{
+  int i,j,m;
+  double dx,dy,dz,dvx,dvy,dvz;
+
+  if (radvary == 0) {
+    sync(Host,X_MASK|V_MASK|OMEGA_MASK);
+    m = 0;
+    if (pbc_flag == 0) {
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0);
+        buf[m++] = h_x(j,1);
+        buf[m++] = h_x(j,2);
+        buf[m++] = h_v(j,0);
+        buf[m++] = h_v(j,1);
+        buf[m++] = h_v(j,2);
+        buf[m++] = h_omega(j,0);
+        buf[m++] = h_omega(j,1);
+        buf[m++] = h_omega(j,2);
+      }
+    } else {
+      if (domain->triclinic == 0) {
+        dx = pbc[0]*domain->xprd;
+        dy = pbc[1]*domain->yprd;
+        dz = pbc[2]*domain->zprd;
+      } else {
+        dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
+        dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
+        dz = pbc[2]*domain->zprd;
+      }
+      if (!deform_vremap) {
+        for (i = 0; i < n; i++) {
+          j = list[i];
+          buf[m++] = h_x(j,0) + dx;
+          buf[m++] = h_x(j,1) + dy;
+          buf[m++] = h_x(j,2) + dz;
+          buf[m++] = h_v(j,0);
+          buf[m++] = h_v(j,1);
+          buf[m++] = h_v(j,2);
+          buf[m++] = h_omega(j,0);
+          buf[m++] = h_omega(j,1);
+          buf[m++] = h_omega(j,2);
+        }
+      } else {
+        dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
+        dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
+        dvz = pbc[2]*h_rate[2];
+        for (i = 0; i < n; i++) {
+          j = list[i];
+          buf[m++] = h_x(j,0) + dx;
+          buf[m++] = h_x(j,1) + dy;
+          buf[m++] = h_x(j,2) + dz;
+         if (mask[i] & deform_groupbit) {
+            buf[m++] = h_v(j,0) + dvx;
+            buf[m++] = h_v(j,1) + dvy;
+            buf[m++] = h_v(j,2) + dvz;
+          } else {
+            buf[m++] = h_v(j,0);
+            buf[m++] = h_v(j,1);
+            buf[m++] = h_v(j,2);
+          }
+          buf[m++] = h_omega(j,0);
+          buf[m++] = h_omega(j,1);
+          buf[m++] = h_omega(j,2);
+        }
+      }
+    }
+  } else {
+    sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
+    m = 0;
+    if (pbc_flag == 0) {
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0);
+        buf[m++] = h_x(j,1);
+        buf[m++] = h_x(j,2);
+        buf[m++] = h_radius[j];
+        buf[m++] = h_rmass[j];
+        buf[m++] = h_v(j,0);
+        buf[m++] = h_v(j,1);
+        buf[m++] = h_v(j,2);
+        buf[m++] = h_omega(j,0);
+        buf[m++] = h_omega(j,1);
+        buf[m++] = h_omega(j,2);
+      }
+    } else {
+      if (domain->triclinic == 0) {
+        dx = pbc[0]*domain->xprd;
+        dy = pbc[1]*domain->yprd;
+        dz = pbc[2]*domain->zprd;
+      } else {
+        dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
+        dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
+        dz = pbc[2]*domain->zprd;
+      }
+      if (!deform_vremap) {
+        for (i = 0; i < n; i++) {
+          j = list[i];
+          buf[m++] = h_x(j,0) + dx;
+          buf[m++] = h_x(j,1) + dy;
+          buf[m++] = h_x(j,2) + dz;
+          buf[m++] = h_radius[j];
+          buf[m++] = h_rmass[j];
+          buf[m++] = h_v(j,0);
+          buf[m++] = h_v(j,1);
+          buf[m++] = h_v(j,2);
+          buf[m++] = h_omega(j,0);
+          buf[m++] = h_omega(j,1);
+          buf[m++] = h_omega(j,2);
+        }
+      } else {
+        dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
+        dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
+        dvz = pbc[2]*h_rate[2];
+        for (i = 0; i < n; i++) {
+          j = list[i];
+          buf[m++] = h_x(j,0) + dx;
+          buf[m++] = h_x(j,1) + dy;
+          buf[m++] = h_x(j,2) + dz;
+          buf[m++] = h_radius[j];
+          buf[m++] = h_rmass[j];
+          if (mask[i] & deform_groupbit) {
+            buf[m++] = h_v(j,0) + dvx;
+            buf[m++] = h_v(j,1) + dvy;
+            buf[m++] = h_v(j,2) + dvz;
+          } else {
+            buf[m++] = h_v(j,0);
+            buf[m++] = h_v(j,1);
+            buf[m++] = h_v(j,2);
+          }
+          buf[m++] = h_omega(j,0);
+          buf[m++] = h_omega(j,1);
+          buf[m++] = h_omega(j,2);
+        }
+      }
+    }
+  }
+
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecSphereKokkos::pack_comm_hybrid(int n, int *list, double *buf)
+{
+  if (radvary == 0) return 0;
+
+  sync(Host,RADIUS_MASK|RMASS_MASK);
+
+  int m = 0;
+  for (int i = 0; i < n; i++) {
+    const int j = list[i];
+    buf[m++] = h_radius[j];
+    buf[m++] = h_rmass[j];
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecSphereKokkos::unpack_comm(int n, int first, double *buf)
+{
+  if (radvary == 0) {
+    int m = 0;
+    const int last = first + n;
+    for (int i = first; i < last; i++) {
+      h_x(i,0) = buf[m++];
+      h_x(i,1) = buf[m++];
+      h_x(i,2) = buf[m++];
+    }
+    modified(Host,X_MASK);
+  } else {
+    int m = 0;
+    const int last = first + n;
+    for (int i = first; i < last; i++) {
+      h_x(i,0) = buf[m++];
+      h_x(i,1) = buf[m++];
+      h_x(i,2) = buf[m++];
+      h_radius[i] = buf[m++];
+      h_rmass[i] = buf[m++];
+    }
+    modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecSphereKokkos::unpack_comm_vel(int n, int first, double *buf)
+{
+  if (radvary == 0) {
+    int m = 0;
+    const int last = first + n;
+    for (int i = first; i < last; i++) {
+      h_x(i,0) = buf[m++];
+      h_x(i,1) = buf[m++];
+      h_x(i,2) = buf[m++];
+      h_v(i,0) = buf[m++];
+      h_v(i,1) = buf[m++];
+      h_v(i,2) = buf[m++];
+      h_omega(i,0) = buf[m++];
+      h_omega(i,1) = buf[m++];
+      h_omega(i,2) = buf[m++];
+    }
+    modified(Host,X_MASK|V_MASK|OMEGA_MASK);
+  } else {
+    int m = 0;
+    const int last = first + n;
+    for (int i = first; i < last; i++) {
+      h_x(i,0) = buf[m++];
+      h_x(i,1) = buf[m++];
+      h_x(i,2) = buf[m++];
+      h_radius[i] = buf[m++];
+      h_rmass[i] = buf[m++];
+      h_v(i,0) = buf[m++];
+      h_v(i,1) = buf[m++];
+      h_v(i,2) = buf[m++];
+      h_omega(i,0) = buf[m++];
+      h_omega(i,1) = buf[m++];
+      h_omega(i,2) = buf[m++];
+    }
+    modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecSphereKokkos::unpack_comm_hybrid(int n, int first, double *buf)
+{
+  if (radvary == 0) return 0;
+
+  int m = 0;
+  const int last = first + n;
+  for (int i = first; i < last; i++) {
+    h_radius[i] = buf[m++];
+    h_rmass[i] = buf[m++];
+  }
+  modified(Host,RADIUS_MASK|RMASS_MASK);
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecSphereKokkos::pack_reverse(int n, int first, double *buf)
+{
+  if(n > 0)
+    sync(Host,F_MASK|TORQUE_MASK);
+
+  int m = 0;
+  const int last = first + n;
+  for (int i = first; i < last; i++) {
+    buf[m++] = h_f(i,0);
+    buf[m++] = h_f(i,1);
+    buf[m++] = h_f(i,2);
+    buf[m++] = h_torque(i,0);
+    buf[m++] = h_torque(i,1);
+    buf[m++] = h_torque(i,2);
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecSphereKokkos::pack_reverse_hybrid(int n, int first, double *buf)
+{
+  if(n > 0)
+    sync(Host,TORQUE_MASK);
+
+  int m = 0;
+  const int last = first + n;
+  for (int i = first; i < last; i++) {
+    buf[m++] = h_torque(i,0);
+    buf[m++] = h_torque(i,1);
+    buf[m++] = h_torque(i,2);
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecSphereKokkos::unpack_reverse(int n, int *list, double *buf)
+{
+  if(n > 0) {
+    modified(Host,F_MASK|TORQUE_MASK);
+  }
+
+  int m = 0;
+  for (int i = 0; i < n; i++) {
+    const int j = list[i];
+    h_f(j,0) += buf[m++];
+    h_f(j,1) += buf[m++];
+    h_f(j,2) += buf[m++];
+    h_torque(j,0) += buf[m++];
+    h_torque(j,1) += buf[m++];
+    h_torque(j,2) += buf[m++];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecSphereKokkos::unpack_reverse_hybrid(int n, int *list, double *buf)
+{
+  if(n > 0) {
+    modified(Host,TORQUE_MASK);
+  }
+
+  int m = 0;
+  for (int i = 0; i < n; i++) {
+    const int j = list[i];
+    h_torque(j,0) += buf[m++];
+    h_torque(j,1) += buf[m++];
+    h_torque(j,2) += buf[m++];
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType,int PBC_FLAG>
+struct AtomVecSphereKokkos_PackBorder {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
+  const typename ArrayTypes<DeviceType>::t_int_2d_const _list;
+  const int _iswap;
+  const typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
+  const typename ArrayTypes<DeviceType>::t_tagint_1d _tag;
+  const typename ArrayTypes<DeviceType>::t_int_1d _type;
+  const typename ArrayTypes<DeviceType>::t_int_1d _mask;
+  typename ArrayTypes<DeviceType>::t_float_1d _radius,_rmass;
+  X_FLOAT _dx,_dy,_dz;
+
+  AtomVecSphereKokkos_PackBorder(
+    const typename ArrayTypes<DeviceType>::t_xfloat_2d &buf,
+    const typename ArrayTypes<DeviceType>::t_int_2d_const &list,
+    const int &iswap,
+    const typename ArrayTypes<DeviceType>::t_x_array &x,
+    const typename ArrayTypes<DeviceType>::t_tagint_1d &tag,
+    const typename ArrayTypes<DeviceType>::t_int_1d &type,
+    const typename ArrayTypes<DeviceType>::t_int_1d &mask,
+    const typename ArrayTypes<DeviceType>::t_float_1d &radius,
+    const typename ArrayTypes<DeviceType>::t_float_1d &rmass,
+    const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz):
+    _list(list),_iswap(iswap),
+    _x(x),_tag(tag),_type(type),_mask(mask),
+    _radius(radius),
+    _rmass(rmass),
+    _dx(dx),_dy(dy),_dz(dz)
+  {
+    const size_t elements = 8;
+    const int maxsend = (buf.extent(0)*buf.extent(1))/elements;
+    _buf = typename ArrayTypes<DeviceType>::t_xfloat_2d_um(buf.data(),maxsend,elements);
+  }
+  
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+    const int j = _list(_iswap,i);
+    if (PBC_FLAG == 0) {
+      _buf(i,0) = _x(j,0);
+      _buf(i,1) = _x(j,1);
+      _buf(i,2) = _x(j,2);
+    } else {
+      _buf(i,0) = _x(j,0) + _dx;
+      _buf(i,1) = _x(j,1) + _dy;
+      _buf(i,2) = _x(j,2) + _dz;
+    }
+    _buf(i,3) = d_ubuf(_tag(j)).d;
+    _buf(i,4) = d_ubuf(_type(j)).d;
+    _buf(i,5) = d_ubuf(_mask(j)).d;
+    _buf(i,6) = _radius(j);
+    _buf(i,7) = _rmass(j);
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecSphereKokkos::pack_border_kokkos(
+  int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap,
+  int pbc_flag, int *pbc, ExecutionSpace space)
+{
+  X_FLOAT dx,dy,dz;
+
+  // This was in atom_vec_dpd_kokkos but doesn't appear in any other atom_vec
+  sync(space,ALL_MASK);
+
+  if (pbc_flag != 0) {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0];
+      dy = pbc[1];
+      dz = pbc[2];
+    }
+    if(space==Host) {
+      AtomVecSphereKokkos_PackBorder<LMPHostType,1> f(
+        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
+        iswap,h_x,h_tag,h_type,h_mask,
+        h_radius,h_rmass,
+        dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+    } else {
+      AtomVecSphereKokkos_PackBorder<LMPDeviceType,1> f(
+        buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
+        iswap,d_x,d_tag,d_type,d_mask,
+        d_radius,d_rmass,
+        dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+    }
+  } else {
+    dx = dy = dz = 0;
+    if(space==Host) {
+      AtomVecSphereKokkos_PackBorder<LMPHostType,0> f(
+        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
+        iswap,h_x,h_tag,h_type,h_mask,
+        h_radius,h_rmass,
+        dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+    } else {
+      AtomVecSphereKokkos_PackBorder<LMPDeviceType,0> f(
+        buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
+        iswap,d_x,d_tag,d_type,d_mask,
+        d_radius,d_rmass,
+        dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+    }
+  }
+  return n*size_border;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecSphereKokkos::pack_border(
+  int n, int *list, double *buf,
+  int pbc_flag, int *pbc)
+{
+  int i,j,m;
+  double dx,dy,dz;
+
+  sync(Host,ALL_MASK);
+
+  m = 0;
+  if (pbc_flag == 0) {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0);
+      buf[m++] = h_x(j,1);
+      buf[m++] = h_x(j,2);
+      buf[m++] = ubuf(h_tag[j]).d;
+      buf[m++] = ubuf(h_type[j]).d;
+      buf[m++] = ubuf(h_mask[j]).d;
+      buf[m++] = h_radius[j];
+      buf[m++] = h_rmass[j];
+    }
+  } else {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0];
+      dy = pbc[1];
+      dz = pbc[2];
+    }
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0) + dx;
+      buf[m++] = h_x(j,1) + dy;
+      buf[m++] = h_x(j,2) + dz;
+      buf[m++] = ubuf(h_tag[j]).d;
+      buf[m++] = ubuf(h_type[j]).d;
+      buf[m++] = ubuf(h_mask[j]).d;
+      buf[m++] = h_radius[j];
+      buf[m++] = h_rmass[j];
+    }
+  }
+
+  if (atom->nextra_border)
+    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
+      m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]);
+
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType,int PBC_FLAG,int DEFORM_VREMAP>
+struct AtomVecSphereKokkos_PackBorderVel {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
+  const typename ArrayTypes<DeviceType>::t_int_2d_const _list;
+  const int _iswap;
+  const typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
+  const typename ArrayTypes<DeviceType>::t_tagint_1d _tag;
+  const typename ArrayTypes<DeviceType>::t_int_1d _type;
+  const typename ArrayTypes<DeviceType>::t_int_1d _mask;
+  typename ArrayTypes<DeviceType>::t_float_1d _radius,_rmass;
+  typename ArrayTypes<DeviceType>::t_v_array _v, _omega;
+  X_FLOAT _dx,_dy,_dz, _dvx, _dvy, _dvz;
+  const int _deform_groupbit;
+
+  AtomVecSphereKokkos_PackBorderVel(
+    const typename ArrayTypes<DeviceType>::t_xfloat_2d &buf,
+    const typename ArrayTypes<DeviceType>::t_int_2d_const &list,
+    const int &iswap,
+    const typename ArrayTypes<DeviceType>::t_x_array &x,
+    const typename ArrayTypes<DeviceType>::t_tagint_1d &tag,
+    const typename ArrayTypes<DeviceType>::t_int_1d &type,
+    const typename ArrayTypes<DeviceType>::t_int_1d &mask,
+    const typename ArrayTypes<DeviceType>::t_float_1d &radius,
+    const typename ArrayTypes<DeviceType>::t_float_1d &rmass,
+    const typename ArrayTypes<DeviceType>::t_v_array &v,
+    const typename ArrayTypes<DeviceType>::t_v_array &omega,
+    const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz,
+    const X_FLOAT &dvx, const X_FLOAT &dvy, const X_FLOAT &dvz,
+    const int &deform_groupbit):
+    _buf(buf),_list(list),_iswap(iswap),
+    _x(x),_tag(tag),_type(type),_mask(mask),
+    _radius(radius),
+    _rmass(rmass),
+    _v(v), _omega(omega),
+    _dx(dx),_dy(dy),_dz(dz),
+    _dvx(dvx),_dvy(dvy),_dvz(dvz),
+    _deform_groupbit(deform_groupbit)
+  {
+    const size_t elements = 14;
+    const int maxsend = (buf.extent(0)*buf.extent(1))/elements;
+    _buf = typename ArrayTypes<DeviceType>::t_xfloat_2d_um(buf.data(),maxsend,elements);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+    const int j = _list(_iswap,i);
+    if (PBC_FLAG == 0) {
+      _buf(i,0) = _x(j,0);
+      _buf(i,1) = _x(j,1);
+      _buf(i,2) = _x(j,2);
+    } else {
+      _buf(i,0) = _x(j,0) + _dx;
+      _buf(i,1) = _x(j,1) + _dy;
+      _buf(i,2) = _x(j,2) + _dz;
+    }
+    _buf(i,3) = d_ubuf(_tag(j)).d;
+    _buf(i,4) = d_ubuf(_type(j)).d;
+    _buf(i,5) = d_ubuf(_mask(j)).d;
+    _buf(i,6) = _radius(j);
+    _buf(i,7) = _rmass(j);
+    if (DEFORM_VREMAP) {
+      if (_mask(i) & _deform_groupbit) {
+        _buf(i,8) = _v(j,0) + _dvx;
+        _buf(i,9) = _v(j,1) + _dvy;
+        _buf(i,10) = _v(j,2) + _dvz;
+      }
+    }
+    else {
+      _buf(i,8) = _v(j,0);
+      _buf(i,9) = _v(j,1);
+      _buf(i,10) = _v(j,2);
+    }
+    _buf(i,11) = _omega(j,0);
+    _buf(i,12) = _omega(j,1);
+    _buf(i,13) = _omega(j,2);
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecSphereKokkos::pack_border_vel_kokkos(
+  int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap,
+  int pbc_flag, int *pbc, ExecutionSpace space)
+{
+  X_FLOAT dx=0,dy=0,dz=0;
+  X_FLOAT dvx=0,dvy=0,dvz=0;
+
+  // This was in atom_vec_dpd_kokkos but doesn't appear in any other atom_vec
+  sync(space,ALL_MASK);
+
+  if (pbc_flag != 0) {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0];
+      dy = pbc[1];
+      dz = pbc[2];
+    }
+    if (!deform_vremap) {
+      if(space==Host) {
+        AtomVecSphereKokkos_PackBorderVel<LMPHostType,1,0> f(
+          buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
+          iswap,h_x,h_tag,h_type,h_mask,
+          h_radius,h_rmass,
+          h_v, h_omega,
+          dx,dy,dz,dvx,dvy,dvz,
+          deform_groupbit);
+        Kokkos::parallel_for(n,f);
+      } else {
+        AtomVecSphereKokkos_PackBorderVel<LMPDeviceType,1,0> f(
+          buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
+          iswap,d_x,d_tag,d_type,d_mask,
+          d_radius,d_rmass,
+          d_v, d_omega,
+          dx,dy,dz,dvx,dvy,dvz,
+          deform_groupbit);
+        Kokkos::parallel_for(n,f);
+      }
+    }
+    else {
+      dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
+      dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
+      dvz = pbc[2]*h_rate[2];
+      if(space==Host) {
+        AtomVecSphereKokkos_PackBorderVel<LMPHostType,1,1> f(
+          buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
+          iswap,h_x,h_tag,h_type,h_mask,
+          h_radius,h_rmass,
+          h_v, h_omega,
+          dx,dy,dz,dvx,dvy,dvz,
+          deform_groupbit);
+        Kokkos::parallel_for(n,f);
+      } else {
+        AtomVecSphereKokkos_PackBorderVel<LMPDeviceType,1,1> f(
+          buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
+          iswap,d_x,d_tag,d_type,d_mask,
+          d_radius,d_rmass,
+          d_v, d_omega,
+          dx,dy,dz,dvx,dvy,dvz,
+          deform_groupbit);
+        Kokkos::parallel_for(n,f);
+      }
+    }
+  } else {
+    if(space==Host) {
+      AtomVecSphereKokkos_PackBorderVel<LMPHostType,0,0> f(
+        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
+        iswap,h_x,h_tag,h_type,h_mask,
+        h_radius,h_rmass,
+        h_v, h_omega,
+        dx,dy,dz,dvx,dvy,dvz,
+        deform_groupbit);
+      Kokkos::parallel_for(n,f);
+    } else {
+      AtomVecSphereKokkos_PackBorderVel<LMPDeviceType,0,0> f(
+        buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
+        iswap,d_x,d_tag,d_type,d_mask,
+        d_radius,d_rmass,
+        d_v, d_omega,
+        dx,dy,dz,dvx,dvy,dvz,
+        deform_groupbit);
+      Kokkos::parallel_for(n,f);
+    }
+  }
+
+  return n*(size_border + size_velocity);
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecSphereKokkos::pack_border_vel(int n, int *list, double *buf,
+                                         int pbc_flag, int *pbc)
+{
+  int i,j,m;
+  double dx,dy,dz,dvx,dvy,dvz;
+
+  sync(Host,ALL_MASK);
+
+  m = 0;
+  if (pbc_flag == 0) {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0);
+      buf[m++] = h_x(j,1);
+      buf[m++] = h_x(j,2);
+      buf[m++] = ubuf(h_tag[j]).d;
+      buf[m++] = ubuf(h_type[j]).d;
+      buf[m++] = ubuf(h_mask[j]).d;
+      buf[m++] = h_radius[j];
+      buf[m++] = h_rmass[j];
+      buf[m++] = h_v(j,0);
+      buf[m++] = h_v(j,1);
+      buf[m++] = h_v(j,2);
+      buf[m++] = h_omega(j,0);
+      buf[m++] = h_omega(j,1);
+      buf[m++] = h_omega(j,2);
+    }
+  } else {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0];
+      dy = pbc[1];
+      dz = pbc[2];
+    }
+    if (!deform_vremap) {
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0) + dx;
+        buf[m++] = h_x(j,1) + dy;
+        buf[m++] = h_x(j,2) + dz;
+        buf[m++] = ubuf(h_tag[j]).d;
+        buf[m++] = ubuf(h_type[j]).d;
+        buf[m++] = ubuf(h_mask[j]).d;
+        buf[m++] = h_radius[j];
+        buf[m++] = h_rmass[j];
+        buf[m++] = h_v(j,0);
+        buf[m++] = h_v(j,1);
+        buf[m++] = h_v(j,2);
+        buf[m++] = h_omega(j,0);
+        buf[m++] = h_omega(j,1);
+        buf[m++] = h_omega(j,2);
+      }
+    } else {
+      dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
+      dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
+      dvz = pbc[2]*h_rate[2];
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0) + dx;
+        buf[m++] = h_x(j,1) + dy;
+        buf[m++] = h_x(j,2) + dz;
+        buf[m++] = ubuf(h_tag[j]).d;
+        buf[m++] = ubuf(h_type[j]).d;
+        buf[m++] = ubuf(h_mask[j]).d;
+        buf[m++] = h_radius[j];
+        buf[m++] = h_rmass[j];
+        if (mask[i] & deform_groupbit) {
+          buf[m++] = h_v(j,0) + dvx;
+          buf[m++] = h_v(j,1) + dvy;
+          buf[m++] = h_v(j,2) + dvz;
+        } else {
+          buf[m++] = h_v(j,0);
+          buf[m++] = h_v(j,1);
+          buf[m++] = h_v(j,2);
+        }
+        buf[m++] = h_omega(j,0);
+        buf[m++] = h_omega(j,1);
+        buf[m++] = h_omega(j,2);
+      }
+    }
+  }
+
+  if (atom->nextra_border)
+    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
+      m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]);
+
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecSphereKokkos::pack_border_hybrid(int n, int *list, double *buf)
+{
+  sync(Host,RADIUS_MASK|RMASS_MASK);
+
+  int m = 0;
+  for (int i = 0; i < n; i++) {
+    const int j = list[i];
+    buf[m++] = h_radius[j];
+    buf[m++] = h_rmass[j];
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecSphereKokkos_UnpackBorder {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_xfloat_2d_const_um _buf;
+  typename ArrayTypes<DeviceType>::t_x_array _x;
+  typename ArrayTypes<DeviceType>::t_tagint_1d _tag;
+  typename ArrayTypes<DeviceType>::t_int_1d _type;
+  typename ArrayTypes<DeviceType>::t_int_1d _mask;
+  typename ArrayTypes<DeviceType>::t_float_1d _radius,_rmass;
+  int _first;
+
+  AtomVecSphereKokkos_UnpackBorder(
+    const typename ArrayTypes<DeviceType>::t_xfloat_2d &buf,
+    const typename ArrayTypes<DeviceType>::t_x_array &x,
+    const typename ArrayTypes<DeviceType>::t_tagint_1d &tag,
+    const typename ArrayTypes<DeviceType>::t_int_1d &type,
+    const typename ArrayTypes<DeviceType>::t_int_1d &mask,
+    const typename ArrayTypes<DeviceType>::t_float_1d &radius,
+    const typename ArrayTypes<DeviceType>::t_float_1d &rmass,
+    const int& first):
+    _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),
+    _radius(radius),
+    _rmass(rmass),
+    _first(first)
+  {
+    const size_t elements = 8;
+    const int maxsend = (buf.extent(0)*buf.extent(1))/elements;
+    _buf = typename ArrayTypes<DeviceType>::t_xfloat_2d_const_um(buf.data(),maxsend,elements);
+  };
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+    _x(i+_first,0) = _buf(i,0);
+    _x(i+_first,1) = _buf(i,1);
+    _x(i+_first,2) = _buf(i,2);
+    _tag(i+_first) = static_cast<tagint> (d_ubuf(_buf(i,3)).i);
+    _type(i+_first) = static_cast<int>  (d_ubuf(_buf(i,4)).i);
+    _mask(i+_first) = static_cast<int>  (d_ubuf(_buf(i,5)).i);
+    _radius(i+_first) = _buf(i,6);
+    _rmass(i+_first) = _buf(i,7);
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecSphereKokkos::unpack_border_kokkos(const int &n, const int &first,
+					       const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) {
+  while (first+n >= nmax) grow(0);
+  if(space==Host) {
+    struct AtomVecSphereKokkos_UnpackBorder<LMPHostType> f(buf.view<LMPHostType>(),
+      h_x,h_tag,h_type,h_mask,
+      h_radius,h_rmass,
+      first);
+    Kokkos::parallel_for(n,f);
+  } else {
+    struct AtomVecSphereKokkos_UnpackBorder<LMPDeviceType> f(buf.view<LMPDeviceType>(),
+      d_x,d_tag,d_type,d_mask,
+      d_radius,d_rmass,
+      first);
+    Kokkos::parallel_for(n,f);
+  }
+
+  modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
+	         RADIUS_MASK|RMASS_MASK);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecSphereKokkos::unpack_border(int n, int first, double *buf)
+{
+  int m = 0;
+  const int last = first + n;
+  for (int i = first; i < last; i++) {
+    if (i == nmax) grow(0);
+    h_x(i,0) = buf[m++];
+    h_x(i,1) = buf[m++];
+    h_x(i,2) = buf[m++];
+    h_tag[i] = (tagint) ubuf(buf[m++]).i;
+    h_type[i] = (int) ubuf(buf[m++]).i;
+    h_mask[i] = (int) ubuf(buf[m++]).i;
+    h_radius[i] = buf[m++];
+    h_rmass[i] = buf[m++];
+  }
+
+  if (atom->nextra_border)
+    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
+      m += modify->fix[atom->extra_border[iextra]]->
+        unpack_border(n,first,&buf[m]);
+
+  modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|RADIUS_MASK|RMASS_MASK);
+}
+
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecSphereKokkos_UnpackBorderVel {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_xfloat_2d_const_um _buf;
+  typename ArrayTypes<DeviceType>::t_x_array _x;
+  typename ArrayTypes<DeviceType>::t_tagint_1d _tag;
+  typename ArrayTypes<DeviceType>::t_int_1d _type;
+  typename ArrayTypes<DeviceType>::t_int_1d _mask;
+  typename ArrayTypes<DeviceType>::t_float_1d _radius,_rmass;
+  typename ArrayTypes<DeviceType>::t_v_array _v;
+  typename ArrayTypes<DeviceType>::t_v_array _omega;
+  int _first;
+
+  AtomVecSphereKokkos_UnpackBorderVel(
+    const typename ArrayTypes<DeviceType>::t_xfloat_2d_const &buf,
+    const typename ArrayTypes<DeviceType>::t_x_array &x,
+    const typename ArrayTypes<DeviceType>::t_tagint_1d &tag,
+    const typename ArrayTypes<DeviceType>::t_int_1d &type,
+    const typename ArrayTypes<DeviceType>::t_int_1d &mask,
+    const typename ArrayTypes<DeviceType>::t_float_1d &radius,
+    const typename ArrayTypes<DeviceType>::t_float_1d &rmass,
+    const typename ArrayTypes<DeviceType>::t_v_array &v,
+    const typename ArrayTypes<DeviceType>::t_v_array &omega,
+    const int& first):
+    _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),
+    _radius(radius),
+    _rmass(rmass),
+    _v(v), _omega(omega),
+    _first(first)
+  {
+    const size_t elements = 14;
+    const int maxsend = (buf.extent(0)*buf.extent(1))/elements;
+    _buf = typename ArrayTypes<DeviceType>::t_xfloat_2d_const_um(buf.data(),maxsend,elements);
+  };
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+    _x(i+_first,0) = _buf(i,0);
+    _x(i+_first,1) = _buf(i,1);
+    _x(i+_first,2) = _buf(i,2);
+    _tag(i+_first) = static_cast<tagint> (d_ubuf(_buf(i,3)).i);
+    _type(i+_first) = static_cast<int>  (d_ubuf(_buf(i,4)).i);
+    _mask(i+_first) = static_cast<int>  (d_ubuf(_buf(i,5)).i);
+    _radius(i+_first) = _buf(i,6);
+    _rmass(i+_first) = _buf(i,7);
+    _v(i+_first,0) = _buf(i,8);
+    _v(i+_first,1) = _buf(i,9);
+    _v(i+_first,2) = _buf(i,10);
+    _omega(i+_first,0) = _buf(i,11);
+    _omega(i+_first,1) = _buf(i,12);
+    _omega(i+_first,2) = _buf(i,13);
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecSphereKokkos::unpack_border_vel_kokkos(
+  const int &n, const int &first,
+  const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) {
+  while (first+n >= nmax) grow(0);
+  if(space==Host) {
+    struct AtomVecSphereKokkos_UnpackBorderVel<LMPHostType> f(buf.view<LMPHostType>(),
+      h_x,h_tag,h_type,h_mask,
+      h_radius,h_rmass,
+      h_v, h_omega,
+      first);
+    Kokkos::parallel_for(n,f);
+  } else {
+    struct AtomVecSphereKokkos_UnpackBorderVel<LMPDeviceType> f(buf.view<LMPDeviceType>(),
+      d_x,d_tag,d_type,d_mask,
+      d_radius,d_rmass,
+      d_v, d_omega,
+      first);
+    Kokkos::parallel_for(n,f);
+  }
+
+  modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
+	         RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecSphereKokkos::unpack_border_vel(int n, int first, double *buf)
+{
+  int m = 0;
+  const int last = first + n;
+  for (int i = first; i < last; i++) {
+    if (i == nmax) grow(0);
+    h_x(i,0) = buf[m++];
+    h_x(i,1) = buf[m++];
+    h_x(i,2) = buf[m++];
+    h_tag[i] = (tagint) ubuf(buf[m++]).i;
+    h_type[i] = (int) ubuf(buf[m++]).i;
+    h_mask[i] = (int) ubuf(buf[m++]).i;
+    h_radius[i] = buf[m++];
+    h_rmass[i] = buf[m++];
+    h_v(i,0) = buf[m++];
+    h_v(i,1) = buf[m++];
+    h_v(i,2) = buf[m++];
+    h_omega(i,0) = buf[m++];
+    h_omega(i,1) = buf[m++];
+    h_omega(i,2) = buf[m++];
+  }
+
+  if (atom->nextra_border)
+    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
+      m += modify->fix[atom->extra_border[iextra]]->
+        unpack_border(n,first,&buf[m]);
+
+  modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecSphereKokkos::unpack_border_hybrid(int n, int first, double *buf)
+{
+  int m = 0;
+  const int last = first + n;
+  for (int i = first; i < last; i++) {
+    h_radius[i] = buf[m++];
+    h_rmass[i] = buf[m++];
+  }
+  modified(Host,RADIUS_MASK|RMASS_MASK);
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecSphereKokkos_PackExchangeFunctor {
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+  typename AT::t_x_array_randomread _x;
+  typename AT::t_v_array_randomread _v;
+  typename AT::t_tagint_1d_randomread _tag;
+  typename AT::t_int_1d_randomread _type;
+  typename AT::t_int_1d_randomread _mask;
+  typename AT::t_imageint_1d_randomread _image;
+  typename AT::t_float_1d_randomread _radius,_rmass;
+  typename AT::t_v_array_randomread _omega;
+  typename AT::t_x_array _xw;
+  typename AT::t_v_array _vw;
+  typename AT::t_tagint_1d _tagw;
+  typename AT::t_int_1d _typew;
+  typename AT::t_int_1d _maskw;
+  typename AT::t_imageint_1d _imagew;
+  typename AT::t_float_1d _radiusw,_rmassw;
+  typename AT::t_v_array _omegaw;
+  typename AT::t_xfloat_2d_um _buf;
+  typename AT::t_int_1d_const _sendlist;
+  typename AT::t_int_1d_const _copylist;
+  int _nlocal,_dim;
+  X_FLOAT _lo,_hi;
+
+  AtomVecSphereKokkos_PackExchangeFunctor(
+    const AtomKokkos* atom,
+    const typename AT::tdual_xfloat_2d buf,
+    typename AT::tdual_int_1d sendlist,
+    typename AT::tdual_int_1d copylist,int nlocal, int dim,X_FLOAT lo, X_FLOAT hi):
+    _x(atom->k_x.view<DeviceType>()),
+    _v(atom->k_v.view<DeviceType>()),
+    _tag(atom->k_tag.view<DeviceType>()),
+    _type(atom->k_type.view<DeviceType>()),
+    _mask(atom->k_mask.view<DeviceType>()),
+    _image(atom->k_image.view<DeviceType>()),
+    _radius(atom->k_radius.view<DeviceType>()),
+    _rmass(atom->k_rmass.view<DeviceType>()),
+    _omega(atom->k_omega.view<DeviceType>()),
+    _xw(atom->k_x.view<DeviceType>()),
+    _vw(atom->k_v.view<DeviceType>()),
+    _tagw(atom->k_tag.view<DeviceType>()),
+    _typew(atom->k_type.view<DeviceType>()),
+    _maskw(atom->k_mask.view<DeviceType>()),
+    _imagew(atom->k_image.view<DeviceType>()),
+    _radiusw(atom->k_radius.view<DeviceType>()),
+    _rmassw(atom->k_rmass.view<DeviceType>()),
+    _omegaw(atom->k_omega.view<DeviceType>()),
+    _sendlist(sendlist.template view<DeviceType>()),
+    _copylist(copylist.template view<DeviceType>()),
+    _nlocal(nlocal),_dim(dim),
+    _lo(lo),_hi(hi)
+  {
+    const size_t elements = 16;
+    const int maxsend = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
+
+    _buf = typename AT::t_xfloat_2d_um(buf.template view<DeviceType>().data(),maxsend,elements);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int &mysend) const {
+    const int i = _sendlist(mysend);
+    _buf(mysend,0) = 16;
+    _buf(mysend,1) = _x(i,0);
+    _buf(mysend,2) = _x(i,1);
+    _buf(mysend,3) = _x(i,2);
+    _buf(mysend,4) = _v(i,0);
+    _buf(mysend,5) = _v(i,1);
+    _buf(mysend,6) = _v(i,2);
+    _buf(mysend,7) = d_ubuf(_tag[i]).d;
+    _buf(mysend,8) = d_ubuf(_type[i]).d;
+    _buf(mysend,9) = d_ubuf(_mask[i]).d;
+    _buf(mysend,10) = d_ubuf(_image[i]).d;
+    _buf(mysend,11) = _radius[i];
+    _buf(mysend,12) = _rmass[i];
+    _buf(mysend,13) = _omega(i,0);
+    _buf(mysend,14) = _omega(i,1);
+    _buf(mysend,15) = _omega(i,2);
+    const int j = _copylist(mysend);
+
+    if (j>-1) {
+      _xw(i,0) = _x(j,0);
+      _xw(i,1) = _x(j,1);
+      _xw(i,2) = _x(j,2);
+      _vw(i,0) = _v(j,0);
+      _vw(i,1) = _v(j,1);
+      _vw(i,2) = _v(j,2);
+      _tagw[i] = _tag(j);
+      _typew[i] = _type(j);
+      _maskw[i] = _mask(j);
+      _imagew[i] = _image(j);
+      _radiusw[i] = _radius(j);
+      _rmassw[i] = _rmass(j);
+      _omegaw(i,0) = _omega(j,0);
+      _omegaw(i,1) = _omega(j,1);
+      _omegaw(i,2) = _omega(j,2);
+    }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecSphereKokkos::pack_exchange_kokkos(
+  const int &nsend,
+  DAT::tdual_xfloat_2d &k_buf,
+  DAT::tdual_int_1d k_sendlist,
+  DAT::tdual_int_1d k_copylist,
+  ExecutionSpace space,int dim,X_FLOAT lo,X_FLOAT hi)
+{
+  if(nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/16) {
+    int newsize = nsend*17/k_buf.view<LMPHostType>().extent(1)+1;
+    k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
+  }
+  sync(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
+             MASK_MASK | IMAGE_MASK| RADIUS_MASK | RMASS_MASK |
+             OMEGA_MASK);
+
+  if(space == Host) {
+    AtomVecSphereKokkos_PackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
+    Kokkos::parallel_for(nsend,f);
+  } else {
+    AtomVecSphereKokkos_PackExchangeFunctor<LMPDeviceType> f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
+    Kokkos::parallel_for(nsend,f);
+  }
+  return nsend*16;
+}
+
+/* ----------------------------------------------------------------------
+   pack data for atom I for sending to another proc
+   xyz must be 1st 3 values, so comm::exchange() can test on them
+------------------------------------------------------------------------- */
+
+int AtomVecSphereKokkos::pack_exchange(int i, double *buf)
+{
+  sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
+            MASK_MASK | IMAGE_MASK| RADIUS_MASK | RMASS_MASK |
+            OMEGA_MASK);
+
+
+  int m = 1;
+  buf[m++] = h_x(i,0);
+  buf[m++] = h_x(i,1);
+  buf[m++] = h_x(i,2);
+  buf[m++] = h_v(i,0);
+  buf[m++] = h_v(i,1);
+  buf[m++] = h_v(i,2);
+  buf[m++] = ubuf(h_tag[i]).d;
+  buf[m++] = ubuf(h_type[i]).d;
+  buf[m++] = ubuf(h_mask[i]).d;
+  buf[m++] = ubuf(h_image[i]).d;
+
+  buf[m++] = h_radius[i];
+  buf[m++] = h_rmass[i];
+  buf[m++] = h_omega(i,0);
+  buf[m++] = h_omega(i,1);
+  buf[m++] = h_omega(i,2);
+
+  if (atom->nextra_grow)
+    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
+      m += modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&buf[m]);
+
+  buf[0] = m;
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecSphereKokkos_UnpackExchangeFunctor {
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+  typename AT::t_x_array _x;
+  typename AT::t_v_array _v;
+  typename AT::t_tagint_1d _tag;
+  typename AT::t_int_1d _type;
+  typename AT::t_int_1d _mask;
+  typename AT::t_imageint_1d _image;
+  typename AT::t_float_1d _radius;
+  typename AT::t_float_1d _rmass;
+  typename AT::t_v_array _omega;
+  typename AT::t_xfloat_2d_um _buf;
+  typename AT::t_int_1d _nlocal;
+  int _dim;
+  X_FLOAT _lo,_hi;
+
+  AtomVecSphereKokkos_UnpackExchangeFunctor(
+    const AtomKokkos* atom,
+    const typename AT::tdual_xfloat_2d buf,
+    typename AT::tdual_int_1d nlocal,
+    int dim, X_FLOAT lo, X_FLOAT hi):
+    _x(atom->k_x.view<DeviceType>()),
+    _v(atom->k_v.view<DeviceType>()),
+    _tag(atom->k_tag.view<DeviceType>()),
+    _type(atom->k_type.view<DeviceType>()),
+    _mask(atom->k_mask.view<DeviceType>()),
+    _image(atom->k_image.view<DeviceType>()),
+    _radius(atom->k_radius.view<DeviceType>()),
+    _rmass(atom->k_rmass.view<DeviceType>()),
+    _omega(atom->k_omega.view<DeviceType>()),
+    _nlocal(nlocal.template view<DeviceType>()),_dim(dim),
+    _lo(lo),_hi(hi)
+  {
+    const size_t elements = 16;
+    const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
+
+    buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int &myrecv) const {
+    X_FLOAT x = _buf(myrecv,_dim+1);
+    if (x >= _lo && x < _hi) {
+      int i = Kokkos::atomic_fetch_add(&_nlocal(0),1);
+      _x(i,0) = _buf(myrecv,1);
+      _x(i,1) = _buf(myrecv,2);
+      _x(i,2) = _buf(myrecv,3);
+      _v(i,0) = _buf(myrecv,4);
+      _v(i,1) = _buf(myrecv,5);
+      _v(i,2) = _buf(myrecv,6);
+      _tag[i] = (tagint) d_ubuf(_buf(myrecv,7)).i;
+      _type[i] = (int) d_ubuf(_buf(myrecv,8)).i;
+      _mask[i] = (int) d_ubuf(_buf(myrecv,9)).i;
+      _image[i] = (imageint) d_ubuf(_buf(myrecv,10)).i;
+      _radius[i] = _buf(myrecv,11);
+      _rmass[i] = _buf(myrecv,12);
+      _omega(i,0) = _buf(myrecv,13);
+      _omega(i,1) = _buf(myrecv,14);
+      _omega(i,2) = _buf(myrecv,15);
+    }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecSphereKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) {
+  if(space == Host) {
+    k_count.h_view(0) = nlocal;
+    AtomVecSphereKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
+    Kokkos::parallel_for(nrecv/16,f);
+  } else {
+    k_count.h_view(0) = nlocal;
+    k_count.modify<LMPHostType>();
+    k_count.sync<LMPDeviceType>();
+    AtomVecSphereKokkos_UnpackExchangeFunctor<LMPDeviceType> f(atomKK,k_buf,k_count,dim,lo,hi);
+    Kokkos::parallel_for(nrecv/16,f);
+    k_count.modify<LMPDeviceType>();
+    k_count.sync<LMPHostType>();
+  }
+
+  modified(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
+                 MASK_MASK | IMAGE_MASK| RADIUS_MASK | RMASS_MASK |
+                 OMEGA_MASK);
+
+  return k_count.h_view(0);
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecSphereKokkos::unpack_exchange(double *buf)
+{
+  int nlocal = atom->nlocal;
+  if (nlocal == nmax) grow(0);
+
+  int m = 1;
+  h_x(nlocal,0) = buf[m++];
+  h_x(nlocal,1) = buf[m++];
+  h_x(nlocal,2) = buf[m++];
+  h_v(nlocal,0) = buf[m++];
+  h_v(nlocal,1) = buf[m++];
+  h_v(nlocal,2) = buf[m++];
+  h_tag[nlocal] = (tagint) ubuf(buf[m++]).i;
+  h_type[nlocal] = (int) ubuf(buf[m++]).i;
+  h_mask[nlocal] = (int) ubuf(buf[m++]).i;
+  h_image[nlocal] = (imageint) ubuf(buf[m++]).i;
+
+  h_radius[nlocal] = buf[m++];
+  h_rmass[nlocal] = buf[m++];
+  h_omega(nlocal,0) = buf[m++];
+  h_omega(nlocal,1) = buf[m++];
+  h_omega(nlocal,2) = buf[m++];
+
+  if (atom->nextra_grow)
+    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
+      m += modify->fix[atom->extra_grow[iextra]]->
+        unpack_exchange(nlocal,&buf[m]);
+
+  modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
+           MASK_MASK | IMAGE_MASK | RADIUS_MASK | RMASS_MASK |
+           OMEGA_MASK);
+
+  atom->nlocal++;
+  return m;
+}
+
+/* ----------------------------------------------------------------------
+   size of restart data for all atoms owned by this proc
+   include extra data stored by fixes
+------------------------------------------------------------------------- */
+
+int AtomVecSphereKokkos::size_restart()
+{
+  int i;
+
+  int nlocal = atom->nlocal;
+  int n = 16 * nlocal;
+
+  if (atom->nextra_restart)
+    for (int iextra = 0; iextra < atom->nextra_restart; iextra++)
+      for (i = 0; i < nlocal; i++)
+        n += modify->fix[atom->extra_restart[iextra]]->size_restart(i);
+
+  return n;
+}
+
+/* ----------------------------------------------------------------------
+   pack atom I's data for restart file including extra quantities
+   xyz must be 1st 3 values, so that read_restart can test on them
+   molecular types may be negative, but write as positive
+------------------------------------------------------------------------- */
+
+int AtomVecSphereKokkos::pack_restart(int i, double *buf)
+{
+  sync(Host,X_MASK | TAG_MASK | TYPE_MASK |
+            MASK_MASK | IMAGE_MASK | V_MASK |
+            RADIUS_MASK | RMASS_MASK | OMEGA_MASK);
+
+  int m = 1;
+  buf[m++] = h_x(i,0);
+  buf[m++] = h_x(i,1);
+  buf[m++] = h_x(i,2);
+  buf[m++] = ubuf(h_tag[i]).d;
+  buf[m++] = ubuf(h_type[i]).d;
+  buf[m++] = ubuf(h_mask[i]).d;
+  buf[m++] = ubuf(h_image[i]).d;
+  buf[m++] = h_v(i,0);
+  buf[m++] = h_v(i,1);
+  buf[m++] = h_v(i,2);
+
+  buf[m++] = h_radius[i];
+  buf[m++] = h_rmass[i];
+  buf[m++] = h_omega(i,0);
+  buf[m++] = h_omega(i,1);
+  buf[m++] = h_omega(i,2);
+
+  if (atom->nextra_restart)
+    for (int iextra = 0; iextra < atom->nextra_restart; iextra++)
+      m += modify->fix[atom->extra_restart[iextra]]->pack_restart(i,&buf[m]);
+
+  buf[0] = m;
+  return m;
+}
+
+/* ----------------------------------------------------------------------
+   unpack data for one atom from restart file including extra quantities
+------------------------------------------------------------------------- */
+
+int AtomVecSphereKokkos::unpack_restart(double *buf)
+{
+  int nlocal = atom->nlocal;
+  if (nlocal == nmax) {
+    grow(0);
+    if (atom->nextra_store)
+      memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra");
+  }
+
+  int m = 1;
+  h_x(nlocal,0) = buf[m++];
+  h_x(nlocal,1) = buf[m++];
+  h_x(nlocal,2) = buf[m++];
+  h_tag[nlocal] = (tagint) ubuf(buf[m++]).i;
+  h_type[nlocal] = (int) ubuf(buf[m++]).i;
+  h_mask[nlocal] = (int) ubuf(buf[m++]).i;
+  h_image[nlocal] = (imageint) ubuf(buf[m++]).i;
+  h_v(nlocal,0) = buf[m++];
+  h_v(nlocal,1) = buf[m++];
+  h_v(nlocal,2) = buf[m++];
+
+  h_radius[nlocal] = buf[m++];
+  h_rmass[nlocal] = buf[m++];
+  h_omega(nlocal,0) = buf[m++];
+  h_omega(nlocal,1) = buf[m++];
+  h_omega(nlocal,2) = buf[m++];
+
+  double **extra = atom->extra;
+  if (atom->nextra_store) {
+    int size = static_cast<int> (buf[0]) - m;
+    for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++];
+  }
+
+  modified(Host,X_MASK | TAG_MASK | TYPE_MASK |
+                MASK_MASK | IMAGE_MASK | V_MASK |
+	        RADIUS_MASK | RMASS_MASK | OMEGA_MASK);
+
+  atom->nlocal++;
+  return m;
+}
+
+/* ----------------------------------------------------------------------
+   create one atom of itype at coord
+   set other values to defaults
+------------------------------------------------------------------------- */
+
+void AtomVecSphereKokkos::create_atom(int itype, double *coord)
+{
+  int nlocal = atom->nlocal;
+  if (nlocal == nmax) {
+    grow(0);
+  }
+
+  h_tag[nlocal] = 0;
+  h_type[nlocal] = itype;
+  h_x(nlocal,0) = coord[0];
+  h_x(nlocal,1) = coord[1];
+  h_x(nlocal,2) = coord[2];
+  h_mask[nlocal] = 1;
+  h_image[nlocal] = ((imageint) IMGMAX << IMG2BITS) |
+    ((imageint) IMGMAX << IMGBITS) | IMGMAX;
+  h_v(nlocal,0) = 0.0;
+  h_v(nlocal,1) = 0.0;
+  h_v(nlocal,2) = 0.0;
+
+  h_radius[nlocal] = 0.5;
+  h_rmass[nlocal] = 4.0*MY_PI/3.0 * h_radius[nlocal]*h_radius[nlocal]*h_radius[nlocal];
+  h_omega(nlocal,0) = 0.0;
+  h_omega(nlocal,1) = 0.0;
+  h_omega(nlocal,2) = 0.0;
+
+  atomKK->modified(Host,ALL_MASK);
+
+  atom->nlocal++;
+}
+
+/* ----------------------------------------------------------------------
+   unpack one line from Atoms section of data file
+   initialize other atom quantities
+------------------------------------------------------------------------- */
+
+void AtomVecSphereKokkos::data_atom(double *coord, imageint imagetmp, char **values)
+{
+  int nlocal = atom->nlocal;
+  if (nlocal == nmax) grow(0);
+
+  tag[nlocal] = ATOTAGINT(values[0]);
+  type[nlocal] = atoi(values[1]);
+  if (type[nlocal] <= 0 || type[nlocal] > atom->ntypes)
+    error->one(FLERR,"Invalid atom type in Atoms section of data file");
+
+  radius[nlocal] = 0.5 * atof(values[2]);
+  if (radius[nlocal] < 0.0)
+    error->one(FLERR,"Invalid radius in Atoms section of data file");
+
+  double density = atof(values[3]);
+  if (density <= 0.0)
+    error->one(FLERR,"Invalid density in Atoms section of data file");
+
+  if (radius[nlocal] == 0.0) rmass[nlocal] = density;
+  else
+    rmass[nlocal] = 4.0*MY_PI/3.0 *
+      radius[nlocal]*radius[nlocal]*radius[nlocal] * density;
+
+  x[nlocal][0] = coord[0];
+  x[nlocal][1] = coord[1];
+  x[nlocal][2] = coord[2];
+
+  image[nlocal] = imagetmp;
+
+  mask[nlocal] = 1;
+  v[nlocal][0] = 0.0;
+  v[nlocal][1] = 0.0;
+  v[nlocal][2] = 0.0;
+  omega[nlocal][0] = 0.0;
+  omega[nlocal][1] = 0.0;
+  omega[nlocal][2] = 0.0;
+
+  atomKK->modified(Host,ALL_MASK);
+
+  atom->nlocal++;
+}
+
+/* ----------------------------------------------------------------------
+   unpack hybrid quantities from one line in Atoms section of data file
+   initialize other atom quantities for this sub-style
+------------------------------------------------------------------------- */
+
+int AtomVecSphereKokkos::data_atom_hybrid(int nlocal, char **values)
+{
+  radius[nlocal] = 0.5 * atof(values[0]);
+  if (radius[nlocal] < 0.0)
+    error->one(FLERR,"Invalid radius in Atoms section of data file");
+
+  double density = atof(values[1]);
+  if (density <= 0.0)
+    error->one(FLERR,"Invalid density in Atoms section of data file");
+
+  if (radius[nlocal] == 0.0) rmass[nlocal] = density;
+  else
+    rmass[nlocal] = 4.0*MY_PI/3.0 *
+      radius[nlocal]*radius[nlocal]*radius[nlocal] * density;
+
+
+  atomKK->modified(Host,RADIUS_MASK|RMASS_MASK);
+
+  return 2;
+}
+
+/* ----------------------------------------------------------------------
+   unpack one line from Velocities section of data file
+------------------------------------------------------------------------- */
+
+void AtomVecSphereKokkos::data_vel(int m, char **values)
+{
+  sync(Host,V_MASK|OMEGA_MASK);
+  h_v(m,0) = atof(values[0]);
+  h_v(m,1) = atof(values[1]);
+  h_v(m,2) = atof(values[2]);
+  h_omega(m,0) = atof(values[3]);
+  h_omega(m,1) = atof(values[4]);
+  h_omega(m,2) = atof(values[5]);
+  modified(Host,V_MASK|OMEGA_MASK);
+}
+
+/* ----------------------------------------------------------------------
+   unpack hybrid quantities from one line in Velocities section of data file
+------------------------------------------------------------------------- */
+
+int AtomVecSphereKokkos::data_vel_hybrid(int m, char **values)
+{
+  sync(Host,OMEGA_MASK);
+  omega[m][0] = atof(values[0]);
+  omega[m][1] = atof(values[1]);
+  omega[m][2] = atof(values[2]);
+  modified(Host,OMEGA_MASK);
+  return 3;
+}
+
+/* ----------------------------------------------------------------------
+   pack atom info for data file including 3 image flags
+------------------------------------------------------------------------- */
+
+void AtomVecSphereKokkos::pack_data(double **buf)
+{
+  atomKK->sync(Host,TAG_MASK|TYPE_MASK|RADIUS_MASK|RMASS_MASK|X_MASK|IMAGE_MASK);
+
+  int nlocal = atom->nlocal;
+  for (int i = 0; i < nlocal; i++) {
+    buf[i][0] = ubuf(h_tag[i]).d;
+    buf[i][1] = ubuf(h_type[i]).d;
+    buf[i][2] = 2.0*h_radius[i];
+    if (h_radius[i] == 0.0) buf[i][3] = h_rmass[i];
+    else
+      buf[i][3] = h_rmass[i] / (4.0*MY_PI/3.0 * h_radius[i]*h_radius[i]*h_radius[i]);
+    buf[i][4] = h_x(i,0);
+    buf[i][5] = h_x(i,1);
+    buf[i][6] = h_x(i,2);
+    buf[i][7] = ubuf((h_image[i] & IMGMASK) - IMGMAX).d;
+    buf[i][8] = ubuf((h_image[i] >> IMGBITS & IMGMASK) - IMGMAX).d;
+    buf[i][9] = ubuf((h_image[i] >> IMG2BITS) - IMGMAX).d;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   pack hybrid atom info for data file
+------------------------------------------------------------------------- */
+
+int AtomVecSphereKokkos::pack_data_hybrid(int i, double *buf)
+{
+  atomKK->sync(Host,RADIUS_MASK|RMASS_MASK);
+
+  buf[0] = 2.0*h_radius[i];
+  if (h_radius[i] == 0.0) buf[1] = h_rmass[i];
+  else buf[1] = h_rmass[i] / (4.0*MY_PI/3.0 * h_radius[i]*h_radius[i]*h_radius[i]);
+  return 2;
+}
+
+/* ----------------------------------------------------------------------
+   write atom info to data file including 3 image flags
+------------------------------------------------------------------------- */
+
+void AtomVecSphereKokkos::write_data(FILE *fp, int n, double **buf)
+{
+  for (int i = 0; i < n; i++)
+    fprintf(fp,TAGINT_FORMAT
+            " %d %-1.16e %-1.16e %-1.16e %-1.16e %-1.16e %d %d %d\n",
+            (tagint) ubuf(buf[i][0]).i,(int) ubuf(buf[i][1]).i,
+            buf[i][2],buf[i][3],
+            buf[i][4],buf[i][5],buf[i][6],
+            (int) ubuf(buf[i][7]).i,(int) ubuf(buf[i][8]).i,
+            (int) ubuf(buf[i][9]).i);
+}
+
+/* ----------------------------------------------------------------------
+   write hybrid atom info to data file
+------------------------------------------------------------------------- */
+
+int AtomVecSphereKokkos::write_data_hybrid(FILE *fp, double *buf)
+{
+  fprintf(fp," %-1.16e %-1.16e",buf[0],buf[1]);
+  return 2;
+}
+
+/* ----------------------------------------------------------------------
+   pack velocity info for data file
+------------------------------------------------------------------------- */
+
+void AtomVecSphereKokkos::pack_vel(double **buf)
+{
+  sync(Host,TAG_MASK|V_MASK|OMEGA_MASK);
+
+  int nlocal = atom->nlocal;
+  for (int i = 0; i < nlocal; i++) {
+    buf[i][0] = ubuf(h_tag[i]).d;
+    buf[i][1] = h_v(i,0);
+    buf[i][2] = h_v(i,1);
+    buf[i][3] = h_v(i,2);
+    buf[i][4] = h_omega(i,0);
+    buf[i][5] = h_omega(i,1);
+    buf[i][6] = h_omega(i,2);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   pack hybrid velocity info for data file
+------------------------------------------------------------------------- */
+
+int AtomVecSphereKokkos::pack_vel_hybrid(int i, double *buf)
+{
+  sync(Host,OMEGA_MASK);
+
+  buf[0] = h_omega(i,0);
+  buf[1] = h_omega(i,1);
+  buf[2] = h_omega(i,2);
+  return 3;
+}
+
+/* ----------------------------------------------------------------------
+   write velocity info to data file
+------------------------------------------------------------------------- */
+
+void AtomVecSphereKokkos::write_vel(FILE *fp, int n, double **buf)
+{
+  for (int i = 0; i < n; i++)
+    fprintf(fp,TAGINT_FORMAT
+            " %-1.16e %-1.16e %-1.16e %-1.16e %-1.16e %-1.16e\n",
+            (tagint) ubuf(buf[i][0]).i,buf[i][1],buf[i][2],buf[i][3],
+            buf[i][4],buf[i][5],buf[i][6]);
+}
+
+/* ----------------------------------------------------------------------
+   write hybrid velocity info to data file
+------------------------------------------------------------------------- */
+
+int AtomVecSphereKokkos::write_vel_hybrid(FILE *fp, double *buf)
+{
+  fprintf(fp," %-1.16e %-1.16e %-1.16e",buf[0],buf[1],buf[2]);
+  return 3;
+}
+
+/* ----------------------------------------------------------------------
+   return # of bytes of allocated memory
+------------------------------------------------------------------------- */
+
+bigint AtomVecSphereKokkos::memory_usage()
+{
+  bigint bytes = 0;
+
+  if (atom->memcheck("tag")) bytes += memory->usage(tag,nmax);
+  if (atom->memcheck("type")) bytes += memory->usage(type,nmax);
+  if (atom->memcheck("mask")) bytes += memory->usage(mask,nmax);
+  if (atom->memcheck("image")) bytes += memory->usage(image,nmax);
+  if (atom->memcheck("x")) bytes += memory->usage(x,nmax,3);
+  if (atom->memcheck("v")) bytes += memory->usage(v,nmax,3);
+  if (atom->memcheck("f")) bytes += memory->usage(f,nmax*comm->nthreads,3);
+
+  if (atom->memcheck("radius")) bytes += memory->usage(radius,nmax);
+  if (atom->memcheck("rmass")) bytes += memory->usage(rmass,nmax);
+  if (atom->memcheck("omega")) bytes += memory->usage(omega,nmax,3);
+  if (atom->memcheck("torque"))
+    bytes += memory->usage(torque,nmax*comm->nthreads,3);
+
+  return bytes;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecSphereKokkos::sync(ExecutionSpace space, unsigned int mask)
+{
+  if (space == Device) {
+    if (mask & X_MASK) atomKK->k_x.sync<LMPDeviceType>();
+    if (mask & V_MASK) atomKK->k_v.sync<LMPDeviceType>();
+    if (mask & F_MASK) atomKK->k_f.sync<LMPDeviceType>();
+    if (mask & TAG_MASK) atomKK->k_tag.sync<LMPDeviceType>();
+    if (mask & TYPE_MASK) atomKK->k_type.sync<LMPDeviceType>();
+    if (mask & MASK_MASK) atomKK->k_mask.sync<LMPDeviceType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPDeviceType>();
+    if (mask & RADIUS_MASK) atomKK->k_radius.sync<LMPDeviceType>();
+    if (mask & RMASS_MASK) atomKK->k_rmass.sync<LMPDeviceType>();
+    if (mask & OMEGA_MASK) atomKK->k_omega.sync<LMPDeviceType>();
+    if (mask & TORQUE_MASK) atomKK->k_torque.sync<LMPDeviceType>();
+  } else {
+    if (mask & X_MASK) atomKK->k_x.sync<LMPHostType>();
+    if (mask & V_MASK) atomKK->k_v.sync<LMPHostType>();
+    if (mask & F_MASK) atomKK->k_f.sync<LMPHostType>();
+    if (mask & TAG_MASK) atomKK->k_tag.sync<LMPHostType>();
+    if (mask & TYPE_MASK) atomKK->k_type.sync<LMPHostType>();
+    if (mask & MASK_MASK) atomKK->k_mask.sync<LMPHostType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPHostType>();
+    if (mask & RADIUS_MASK) atomKK->k_radius.sync<LMPHostType>();
+    if (mask & RMASS_MASK) atomKK->k_rmass.sync<LMPHostType>();
+    if (mask & OMEGA_MASK) atomKK->k_omega.sync<LMPHostType>();
+    if (mask & TORQUE_MASK) atomKK->k_torque.sync<LMPHostType>();
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecSphereKokkos::sync_overlapping_device(ExecutionSpace space, unsigned int mask)
+{
+  if (space == Device) {
+    if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space);
+    if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space);
+    if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space);
+    if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space);
+    if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space);
+    if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space);
+    if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space);
+    if ((mask & RADIUS_MASK) && atomKK->k_radius.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_float_1d>(atomKK->k_radius,space);
+    if ((mask & RMASS_MASK) && atomKK->k_rmass.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_float_1d>(atomKK->k_rmass,space);
+    if ((mask & OMEGA_MASK) && atomKK->k_omega.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_v_array>(atomKK->k_omega,space);
+    if ((mask & TORQUE_MASK) && atomKK->k_torque.need_sync<LMPDeviceType>())
+      perform_async_copy<DAT::tdual_f_array>(atomKK->k_torque,space);
+  } else {
+    if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space);
+    if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space);
+    if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space);
+    if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space);
+    if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space);
+    if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space);
+    if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space);
+    if ((mask & RADIUS_MASK) && atomKK->k_radius.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_float_1d>(atomKK->k_radius,space);
+    if ((mask & RMASS_MASK) && atomKK->k_rmass.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_float_1d>(atomKK->k_rmass,space);
+    if ((mask & OMEGA_MASK) && atomKK->k_omega.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_v_array>(atomKK->k_omega,space);
+    if ((mask & TORQUE_MASK) && atomKK->k_torque.need_sync<LMPHostType>())
+      perform_async_copy<DAT::tdual_f_array>(atomKK->k_torque,space);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecSphereKokkos::modified(ExecutionSpace space, unsigned int mask)
+{
+  if (space == Device) {
+    if (mask & X_MASK) atomKK->k_x.modify<LMPDeviceType>();
+    if (mask & V_MASK) atomKK->k_v.modify<LMPDeviceType>();
+    if (mask & F_MASK) atomKK->k_f.modify<LMPDeviceType>();
+    if (mask & TAG_MASK) atomKK->k_tag.modify<LMPDeviceType>();
+    if (mask & TYPE_MASK) atomKK->k_type.modify<LMPDeviceType>();
+    if (mask & MASK_MASK) atomKK->k_mask.modify<LMPDeviceType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPDeviceType>();
+    if (mask & RADIUS_MASK) atomKK->k_radius.modify<LMPDeviceType>();
+    if (mask & RMASS_MASK) atomKK->k_rmass.modify<LMPDeviceType>();
+    if (mask & OMEGA_MASK) atomKK->k_omega.modify<LMPDeviceType>();
+    if (mask & TORQUE_MASK) atomKK->k_torque.modify<LMPDeviceType>();
+  } else {
+    if (mask & X_MASK) atomKK->k_x.modify<LMPHostType>();
+    if (mask & V_MASK) atomKK->k_v.modify<LMPHostType>();
+    if (mask & F_MASK) atomKK->k_f.modify<LMPHostType>();
+    if (mask & TAG_MASK) atomKK->k_tag.modify<LMPHostType>();
+    if (mask & TYPE_MASK) atomKK->k_type.modify<LMPHostType>();
+    if (mask & MASK_MASK) atomKK->k_mask.modify<LMPHostType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPHostType>();
+    if (mask & RADIUS_MASK) atomKK->k_radius.modify<LMPHostType>();
+    if (mask & RMASS_MASK) atomKK->k_rmass.modify<LMPHostType>();
+    if (mask & OMEGA_MASK) atomKK->k_omega.modify<LMPHostType>();
+    if (mask & TORQUE_MASK) atomKK->k_torque.modify<LMPHostType>();
+  }
+}
diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.h b/src/KOKKOS/atom_vec_sphere_kokkos.h
new file mode 100644
index 0000000000000000000000000000000000000000..28c8a3c8f6ef039b959855ee59306066bb15ea36
--- /dev/null
+++ b/src/KOKKOS/atom_vec_sphere_kokkos.h
@@ -0,0 +1,169 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef ATOM_CLASS
+
+AtomStyle(sphere/kk,AtomVecSphereKokkos)
+AtomStyle(sphere/kk/device,AtomVecSphereKokkos)
+AtomStyle(sphere/kk/host,AtomVecSphereKokkos)
+
+#else
+
+#ifndef LMP_ATOM_VEC_SPHERE_KOKKOS_H
+#define LMP_ATOM_VEC_SPHERE_KOKKOS_H
+
+#include "atom_vec_kokkos.h"
+#include "kokkos_type.h"
+
+namespace LAMMPS_NS {
+
+class AtomVecSphereKokkos : public AtomVecKokkos {
+ public:
+  AtomVecSphereKokkos(class LAMMPS *);
+  ~AtomVecSphereKokkos() {}
+  void init();
+  void grow(int);
+  void grow_reset();
+  void copy(int, int, int);
+  int pack_comm(int, int *, double *, int, int *);
+  int pack_comm_vel(int, int *, double *, int, int *);
+  int pack_comm_hybrid(int, int *, double *);
+  void unpack_comm(int, int, double *);
+  void unpack_comm_vel(int, int, double *);
+  int unpack_comm_hybrid(int, int, double *);
+  int pack_reverse(int, int, double *);
+  int pack_reverse_hybrid(int, int, double *);
+  void unpack_reverse(int, int *, double *);
+  int unpack_reverse_hybrid(int, int *, double *);
+  int pack_border(int, int *, double *, int, int *);
+  int pack_border_vel(int, int *, double *, int, int *);
+  int pack_border_hybrid(int, int *, double *);
+  void unpack_border(int, int, double *);
+  void unpack_border_vel(int, int, double *);
+  int unpack_border_hybrid(int, int, double *);
+  int pack_exchange(int, double *);
+  int unpack_exchange(double *);
+  int size_restart();
+  int pack_restart(int, double *);
+  int unpack_restart(double *);
+  void create_atom(int, double *);
+  void data_atom(double *, imageint, char **);
+  int data_atom_hybrid(int, char **);
+  void data_vel(int, char **);
+  int data_vel_hybrid(int, char **);
+  void pack_data(double **);
+  int pack_data_hybrid(int, double *);
+  void write_data(FILE *, int, double **);
+  int write_data_hybrid(FILE *, double *);
+  void pack_vel(double **);
+  int pack_vel_hybrid(int, double *);
+  void write_vel(FILE *, int, double **);
+  int write_vel_hybrid(FILE *, double *);
+  bigint memory_usage();
+
+  int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
+                       const int & iswap,
+                       const DAT::tdual_xfloat_2d &buf,
+                       const int &pbc_flag, const int pbc[]);
+  void unpack_comm_kokkos(const int &n, const int &nfirst,
+                          const DAT::tdual_xfloat_2d &buf);
+  int pack_comm_vel_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
+                           const int & iswap,
+                           const DAT::tdual_xfloat_2d &buf,
+                           const int &pbc_flag, const int pbc[]);
+  void unpack_comm_vel_kokkos(const int &n, const int &nfirst,
+                              const DAT::tdual_xfloat_2d &buf);
+  int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
+                     const int & iswap, const int nfirst,
+                     const int &pbc_flag, const int pbc[]);
+  int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
+                         DAT::tdual_xfloat_2d buf,int iswap,
+                         int pbc_flag, int *pbc, ExecutionSpace space);
+  void unpack_border_kokkos(const int &n, const int &nfirst,
+                            const DAT::tdual_xfloat_2d &buf,
+                            ExecutionSpace space);
+  int pack_border_vel_kokkos(int n, DAT::tdual_int_2d k_sendlist,
+                             DAT::tdual_xfloat_2d buf,int iswap,
+                             int pbc_flag, int *pbc, ExecutionSpace space);
+  void unpack_border_vel_kokkos(const int &n, const int &nfirst,
+                                const DAT::tdual_xfloat_2d &buf,
+                                ExecutionSpace space);
+  int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
+                           DAT::tdual_int_1d k_sendlist,
+                           DAT::tdual_int_1d k_copylist,
+                           ExecutionSpace space, int dim,
+                           X_FLOAT lo, X_FLOAT hi);
+  int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
+                             int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
+                             ExecutionSpace space);
+
+  void sync(ExecutionSpace space, unsigned int mask);
+  void modified(ExecutionSpace space, unsigned int mask);
+  void sync_overlapping_device(ExecutionSpace space, unsigned int mask);
+
+ private:
+  tagint *tag;
+  int *type,*mask;
+  imageint *image;
+  double **x,**v,**f;
+  double *radius,*rmass;
+  double **omega,**torque;
+  int radvary;
+
+  DAT::t_tagint_1d d_tag;
+  HAT::t_tagint_1d h_tag;
+  DAT::t_imageint_1d d_image;
+  HAT::t_imageint_1d h_image;
+  DAT::t_int_1d d_type, d_mask;
+  HAT::t_int_1d h_type, h_mask;
+
+  DAT::t_x_array d_x;
+  DAT::t_v_array d_v;
+  DAT::t_f_array d_f;
+  DAT::t_float_1d d_radius;
+  HAT::t_float_1d h_radius;
+  DAT::t_float_1d d_rmass;
+  HAT::t_float_1d h_rmass;
+  DAT::t_v_array d_omega;
+  HAT::t_v_array h_omega;
+  DAT::t_f_array d_torque;
+  HAT::t_f_array h_torque;
+
+  DAT::tdual_int_1d k_count;
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+E: Per-processor system is too big
+
+The number of owned atoms plus ghost atoms on a single
+processor must fit in 32-bit integer.
+
+E: Invalid atom type in Atoms section of data file
+
+Atom types must range from 1 to specified # of types.
+
+E: Invalid radius in Atoms section of data file
+
+Radius must be >= 0.0.
+
+E: Invalid density in Atoms section of data file
+
+Density value cannot be <= 0.0.
+
+*/
diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp
index 21840b7c3ee8fd396b90e9983cd57c2d49ef64aa..f40156aabcec16b85fe396b70e54c08164a3921f 100644
--- a/src/KOKKOS/comm_kokkos.cpp
+++ b/src/KOKKOS/comm_kokkos.cpp
@@ -134,11 +134,11 @@ void CommKokkos::init()
   if (force->newton == 0) check_reverse = 0;
   if (force->pair) check_reverse += force->pair->comm_reverse_off;
 
-  if (ghost_velocity)
-    forward_comm_classic = true;
-
   if (!comm_f_only) // not all Kokkos atom_vec styles have reverse pack/unpack routines yet
     reverse_comm_classic = true;
+
+  if (ghost_velocity && ((AtomVecKokkos*)atom->avec)->no_comm_vel_flag) // not all Kokkos atom_vec styles have comm vel pack/unpack routines yet
+    forward_comm_classic = true;
 }
 
 /* ----------------------------------------------------------------------
@@ -191,10 +191,10 @@ void CommKokkos::forward_comm_device(int dummy)
     if (sendproc[iswap] != me) {
       if (comm_x_only) {
         if (size_forward_recv[iswap]) {
-            buf = atomKK->k_x.view<DeviceType>().data() +
-              firstrecv[iswap]*atomKK->k_x.view<DeviceType>().extent(1);
-            MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
-                      recvproc[iswap],0,world,&request);
+          buf = atomKK->k_x.view<DeviceType>().data() +
+            firstrecv[iswap]*atomKK->k_x.view<DeviceType>().extent(1);
+          MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
+                    recvproc[iswap],0,world,&request);
         }
         n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,
                                    iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]);
@@ -210,17 +210,21 @@ void CommKokkos::forward_comm_device(int dummy)
                            space,X_MASK);
         }
       } else if (ghost_velocity) {
-        error->all(FLERR,"Ghost velocity forward comm not yet "
-                   "implemented with Kokkos");
-        if (size_forward_recv[iswap])
-          MPI_Irecv(k_buf_recv.view<LMPHostType>().data(),
+        if (size_forward_recv[iswap]) { 
+          MPI_Irecv(k_buf_recv.view<DeviceType>().data(),
                     size_forward_recv[iswap],MPI_DOUBLE,
                     recvproc[iswap],0,world,&request);
-        n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap],
-                                buf_send,pbc_flag[iswap],pbc[iswap]);
-        if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
+        }
+        n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap,
+                                       k_buf_send,pbc_flag[iswap],pbc[iswap]);
+        DeviceType::fence();
+        if (n) {
+          MPI_Send(k_buf_send.view<DeviceType>().data(),n,
+                   MPI_DOUBLE,sendproc[iswap],0,world);
+        }
         if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
-        avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_recv);
+        avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv);
+        DeviceType::fence();
       } else {
         if (size_forward_recv[iswap])
           MPI_Irecv(k_buf_recv.view<DeviceType>().data(),
@@ -236,18 +240,18 @@ void CommKokkos::forward_comm_device(int dummy)
         avec->unpack_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv);
         DeviceType::fence();
       }
-
     } else {
       if (!ghost_velocity) {
         if (sendnum[iswap])
           n = avec->pack_comm_self(sendnum[iswap],k_sendlist,iswap,
                                    firstrecv[iswap],pbc_flag[iswap],pbc[iswap]);
-      } else if (ghost_velocity) {
-        error->all(FLERR,"Ghost velocity forward comm not yet "
-                   "implemented with Kokkos");
-        n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap],
-                                buf_send,pbc_flag[iswap],pbc[iswap]);
-        avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_send);
+        DeviceType::fence();
+      } else {
+        n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap,
+                                       k_buf_send,pbc_flag[iswap],pbc[iswap]);
+        DeviceType::fence();
+        avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send);
+        DeviceType::fence();
       }
     }
   }
@@ -631,10 +635,9 @@ void CommKokkos::exchange_device()
       nsend =
         avec->pack_exchange_kokkos(k_count.h_view(),k_buf_send,
                                    k_exchange_sendlist,k_exchange_copylist,
-                                   ExecutionSpaceFromDevice<DeviceType>::
-                                   space,dim,lo,hi);
+                                   ExecutionSpaceFromDevice<DeviceType>::space,
+                                   dim,lo,hi);
       DeviceType::fence();
-
     } else {
       while (i < nlocal) {
         if (x[i][dim] < lo || x[i][dim] >= hi) {
@@ -725,7 +728,8 @@ void CommKokkos::borders()
   if (!exchange_comm_classic) {
     static int print = 1;
 
-    if (style != Comm::SINGLE || bordergroup || ghost_velocity) {
+    if (mode != Comm::SINGLE || bordergroup ||
+         (ghost_velocity && ((AtomVecKokkos*)atom->avec)->no_border_vel_flag)) {
       if (print && comm->me==0) {
         error->warning(FLERR,"Required border comm not yet implemented in Kokkos communication, "
                       "switching to classic communication");
@@ -833,7 +837,7 @@ void CommKokkos::borders_device() {
       // store sent atom indices in list for use in future timesteps
 
       x = atom->x;
-      if (style == Comm::SINGLE) {
+      if (mode == Comm::SINGLE) {
         lo = slablo[iswap];
         hi = slabhi[iswap];
       } else {
@@ -862,7 +866,7 @@ void CommKokkos::borders_device() {
 
       if (sendflag) {
         if (!bordergroup || ineed >= 2) {
-          if (style == Comm::SINGLE) {
+          if (mode == Comm::SINGLE) {
             k_total_send.h_view() = 0;
             k_total_send.template modify<LMPHostType>();
             k_total_send.template sync<LMPDeviceType>();
@@ -910,7 +914,7 @@ void CommKokkos::borders_device() {
         } else {
           error->all(FLERR,"Required border comm not yet "
                      "implemented with Kokkos");
-          if (style == Comm::SINGLE) {
+          if (mode == Comm::SINGLE) {
             ngroup = atom->nfirst;
             for (i = 0; i < ngroup; i++)
               if (x[i][dim] >= lo && x[i][dim] <= hi) {
@@ -947,10 +951,10 @@ void CommKokkos::borders_device() {
       if (nsend*size_border > maxsend)
         grow_send_kokkos(nsend*size_border,0);
       if (ghost_velocity) {
-        error->all(FLERR,"Required border comm not yet "
-                   "implemented with Kokkos");
-        n = avec->pack_border_vel(nsend,sendlist[iswap],buf_send,
-                                  pbc_flag[iswap],pbc[iswap]);
+        n = avec->
+          pack_border_vel_kokkos(nsend,k_sendlist,k_buf_send,iswap,
+                                 pbc_flag[iswap],pbc[iswap],exec_space);
+        DeviceType::fence();
       }
       else {
         n = avec->
@@ -983,11 +987,16 @@ void CommKokkos::borders_device() {
       // unpack buffer
 
       if (ghost_velocity) {
-        error->all(FLERR,"Required border comm not yet "
-                   "implemented with Kokkos");
-        avec->unpack_border_vel(nrecv,atom->nlocal+atom->nghost,buf);
-      }
-      else
+        if (sendproc[iswap] != me) {
+          avec->unpack_border_vel_kokkos(nrecv,atom->nlocal+atom->nghost,
+                                         k_buf_recv,exec_space);
+          DeviceType::fence();
+        } else {
+          avec->unpack_border_vel_kokkos(nrecv,atom->nlocal+atom->nghost,
+                                         k_buf_send,exec_space);
+          DeviceType::fence();
+        }
+      } else {
         if (sendproc[iswap] != me) {
           avec->unpack_border_kokkos(nrecv,atom->nlocal+atom->nghost,
                                      k_buf_recv,exec_space);
@@ -997,7 +1006,7 @@ void CommKokkos::borders_device() {
                                      k_buf_send,exec_space);
           DeviceType::fence();
         }
-
+      }
       // set all pointers & counters
 
       smax = MAX(smax,nsend);
@@ -1064,12 +1073,22 @@ void CommKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space)
     else
       k_buf_send.modify<LMPHostType>();
 
-    k_buf_send.resize(maxsend_border,atom->avec->size_border);
+    if (ghost_velocity)
+      k_buf_send.resize(maxsend_border,
+                        atom->avec->size_border + atom->avec->size_velocity);
+    else
+      k_buf_send.resize(maxsend_border,atom->avec->size_border);
     buf_send = k_buf_send.view<LMPHostType>().data();
   }
   else {
-    k_buf_send = DAT::
-      tdual_xfloat_2d("comm:k_buf_send",maxsend_border,atom->avec->size_border);
+    if (ghost_velocity)
+      k_buf_send = DAT::
+        tdual_xfloat_2d("comm:k_buf_send",
+                        maxsend_border,
+                        atom->avec->size_border + atom->avec->size_velocity);
+    else
+      k_buf_send = DAT::
+        tdual_xfloat_2d("comm:k_buf_send",maxsend_border,atom->avec->size_border);
     buf_send = k_buf_send.view<LMPHostType>().data();
   }
 }
@@ -1115,7 +1134,7 @@ void CommKokkos::grow_swap(int n)
 {
   free_swap();
   allocate_swap(n);
-  if (style == Comm::MULTI) {
+  if (mode == Comm::MULTI) {
     free_multi();
     allocate_multi(n);
   }
diff --git a/src/KOKKOS/compute_temp_kokkos.cpp b/src/KOKKOS/compute_temp_kokkos.cpp
index fec3d89fffa8169e975373c3f20f258af3ee0b47..d94c6e76b51df3d2187d52db23be408c6dc07635 100644
--- a/src/KOKKOS/compute_temp_kokkos.cpp
+++ b/src/KOKKOS/compute_temp_kokkos.cpp
@@ -49,8 +49,10 @@ double ComputeTempKokkos<DeviceType>::compute_scalar()
   invoked_scalar = update->ntimestep;
 
   v = atomKK->k_v.view<DeviceType>();
-  rmass = atomKK->rmass;
-  mass = atomKK->k_mass.view<DeviceType>();
+  if (atomKK->rmass)
+    rmass = atomKK->k_rmass.view<DeviceType>();
+  else
+    mass = atomKK->k_mass.view<DeviceType>();
   type = atomKK->k_type.view<DeviceType>();
   mask = atomKK->k_mask.view<DeviceType>();
   int nlocal = atom->nlocal;
@@ -59,7 +61,7 @@ double ComputeTempKokkos<DeviceType>::compute_scalar()
   CTEMP t_kk;
 
   copymode = 1;
-  if (rmass)
+  if (atomKK->rmass)
     Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagComputeTempScalar<1> >(0,nlocal),*this,t_kk);
   else
     Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagComputeTempScalar<0> >(0,nlocal),*this,t_kk);
@@ -102,8 +104,10 @@ void ComputeTempKokkos<DeviceType>::compute_vector()
   invoked_vector = update->ntimestep;
 
   v = atomKK->k_v.view<DeviceType>();
-  rmass = atomKK->rmass;
-  mass = atomKK->k_mass.view<DeviceType>();
+  if (atomKK->rmass)
+    rmass = atomKK->k_rmass.view<DeviceType>();
+  else
+    mass = atomKK->k_mass.view<DeviceType>();
   type = atomKK->k_type.view<DeviceType>();
   mask = atomKK->k_mask.view<DeviceType>();
   int nlocal = atom->nlocal;
@@ -113,7 +117,7 @@ void ComputeTempKokkos<DeviceType>::compute_vector()
   CTEMP t_kk;
 
   copymode = 1;
-  if (rmass)
+  if (atomKK->rmass)
     Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagComputeTempVector<1> >(0,nlocal),*this,t_kk);
   else
     Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagComputeTempVector<0> >(0,nlocal),*this,t_kk);
diff --git a/src/KOKKOS/compute_temp_kokkos.h b/src/KOKKOS/compute_temp_kokkos.h
index a01bb42f8c3472d8254cb0ecd0e10bcd68461ceb..7f116f712038a450c1e7c5bad698b266aa824e4d 100644
--- a/src/KOKKOS/compute_temp_kokkos.h
+++ b/src/KOKKOS/compute_temp_kokkos.h
@@ -84,7 +84,7 @@ class ComputeTempKokkos : public ComputeTemp {
 
  protected:
   typename ArrayTypes<DeviceType>::t_v_array_randomread v;
-  double *rmass;
+  typename ArrayTypes<DeviceType>::t_float_1d_randomread rmass;
   typename ArrayTypes<DeviceType>::t_float_1d_randomread mass;
   typename ArrayTypes<DeviceType>::t_int_1d_randomread type;
   typename ArrayTypes<DeviceType>::t_int_1d_randomread mask;
diff --git a/src/KOKKOS/fix_freeze_kokkos.cpp b/src/KOKKOS/fix_freeze_kokkos.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b87ec86f8f5c28ab4afe97a0c2e4d2cb9f0d1507
--- /dev/null
+++ b/src/KOKKOS/fix_freeze_kokkos.cpp
@@ -0,0 +1,124 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "fix_freeze_kokkos.h"
+#include "atom_masks.h"
+#include "atom_kokkos.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+FixFreezeKokkos<DeviceType>::FixFreezeKokkos(LAMMPS *lmp, int narg, char **arg) :
+  FixFreeze(lmp, narg, arg)
+{
+  kokkosable = 1;
+  atomKK = (AtomKokkos *)atom;
+  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
+
+  datamask_read = F_MASK | MASK_MASK;
+  datamask_modify = F_MASK | TORQUE_MASK;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+int FixFreezeKokkos<DeviceType>::setmask()
+{
+  return FixFreeze::setmask();
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void FixFreezeKokkos<DeviceType>::init()
+{
+  FixFreeze::init();
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void FixFreezeKokkos<DeviceType>::setup(int vflag)
+{
+  FixFreeze::setup(vflag);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void FixFreezeKokkos<DeviceType>::post_force(int vflag)
+{
+  atomKK->sync(execution_space,datamask_read);
+  atomKK->modified(execution_space,datamask_modify);
+
+  f = atomKK->k_f.view<DeviceType>();
+  torque = atomKK->k_torque.view<DeviceType>();
+  mask = atomKK->k_mask.view<DeviceType>();
+
+  int nlocal = atom->nlocal;
+  if (igroup == atom->firstgroup) nlocal = atom->nfirst;
+
+  force_flag = 0;
+  copymode = 1;
+  OriginalForce original;
+  Kokkos::parallel_reduce(nlocal, *this, original);
+  copymode = 0;
+  
+  foriginal[0] = original.values[0];
+  foriginal[1] = original.values[1];
+  foriginal[2] = original.values[2];
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void FixFreezeKokkos<DeviceType>::post_force_respa(int vflag, int ilevel, int iloop)
+{
+  post_force(vflag);
+}
+
+/* ----------------------------------------------------------------------
+   return components of total force on fix group before force was changed
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+double FixFreezeKokkos<DeviceType>::compute_vector(int n)
+{
+  return FixFreeze::compute_vector(n);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void FixFreezeKokkos<DeviceType>::operator()(const int i, OriginalForce &original) const {
+  if (mask[i] & groupbit) {
+    original.values[0] += f(i,0);
+    original.values[1] += f(i,1);
+    original.values[2] += f(i,2);
+    f(i,0) = 0.0;
+    f(i,1) = 0.0;
+    f(i,2) = 0.0;
+    torque(i,0) = 0.0;
+    torque(i,1) = 0.0;
+    torque(i,2) = 0.0;
+  }
+}
+
+namespace LAMMPS_NS {
+template class FixFreezeKokkos<LMPDeviceType>;
+#ifdef KOKKOS_HAVE_CUDA
+template class FixFreezeKokkos<LMPHostType>;
+#endif
+}
diff --git a/src/KOKKOS/fix_freeze_kokkos.h b/src/KOKKOS/fix_freeze_kokkos.h
new file mode 100644
index 0000000000000000000000000000000000000000..193e0119958ec44422342a17a9ff8a22855287e0
--- /dev/null
+++ b/src/KOKKOS/fix_freeze_kokkos.h
@@ -0,0 +1,79 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef FIX_CLASS
+
+FixStyle(freeze/kk,FixFreezeKokkos<LMPDeviceType>)
+FixStyle(freeze/kk/device,FixFreezeKokkos<LMPDeviceType>)
+FixStyle(freeze/kk/host,FixFreezeKokkos<LMPHostType>)
+
+#else
+
+#ifndef LMP_FIX_FREEZE_KOKKOS_H
+#define LMP_FIX_FREEZE_KOKKOS_H
+
+#include "fix_freeze.h"
+#include "kokkos_type.h"
+
+namespace LAMMPS_NS {
+  
+template<class DeviceType>
+class FixFreezeKokkos : public FixFreeze {
+ public:
+  struct OriginalForce {
+    double values[3];
+
+    KOKKOS_INLINE_FUNCTION
+    OriginalForce() {
+      values[0] = 0;
+      values[1] = 0;
+      values[2] = 0;
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    OriginalForce &operator+=(const OriginalForce &rhs) {
+      values[0] += rhs.values[0];
+      values[1] += rhs.values[1];
+      values[2] += rhs.values[2];
+      return *this;
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    void operator+=(const volatile OriginalForce &rhs) volatile {
+      values[0] += rhs.values[0];
+      values[1] += rhs.values[1];
+      values[2] += rhs.values[2];
+    }
+  };
+
+  FixFreezeKokkos(class LAMMPS *, int, char **);
+  int setmask();
+  void init();
+  void setup(int);
+  void post_force(int);
+  void post_force_respa(int, int, int);
+  double compute_vector(int);
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(const int i, OriginalForce &original) const;
+  
+ private:
+  typename ArrayTypes<DeviceType>::t_f_array f;
+  typename ArrayTypes<DeviceType>::t_f_array torque;
+  typename ArrayTypes<DeviceType>::t_int_1d mask;
+};
+
+} // namespace LAMMPS_NS
+
+#endif // LMP_FIX_FREEZE_KOKKOS_H
+#endif // FIX_CLASS
diff --git a/src/KOKKOS/fix_gravity_kokkos.cpp b/src/KOKKOS/fix_gravity_kokkos.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2aff7af56cf74ca970ce9c470379282fbcfd8e41
--- /dev/null
+++ b/src/KOKKOS/fix_gravity_kokkos.cpp
@@ -0,0 +1,121 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "fix_gravity_kokkos.h"
+#include "atom_masks.h"
+#include "modify.h"
+#include "input.h"
+#include "variable.h"
+#include "update.h"
+#include "atom_kokkos.h"
+#include "atom_vec.h"
+
+using namespace LAMMPS_NS;
+
+enum{CONSTANT,EQUAL};
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+FixGravityKokkos<DeviceType>::FixGravityKokkos(LAMMPS *lmp, int narg, char **arg) :
+  FixGravity(lmp, narg, arg)
+{
+  kokkosable = 1;
+  atomKK = (AtomKokkos *)atom;
+  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
+
+  datamask_read = X_MASK | F_MASK | RMASS_MASK | MASK_MASK | TYPE_MASK;
+  datamask_modify = F_MASK;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void FixGravityKokkos<DeviceType>::post_force(int vflag)
+{
+  // update gravity due to variables
+
+  if (varflag != CONSTANT) {
+    modify->clearstep_compute();
+    if (mstyle == EQUAL) magnitude = input->variable->compute_equal(mvar);
+    if (vstyle == EQUAL) vert = input->variable->compute_equal(vvar);
+    if (pstyle == EQUAL) phi = input->variable->compute_equal(pvar);
+    if (tstyle == EQUAL) theta = input->variable->compute_equal(tvar);
+    if (xstyle == EQUAL) xdir = input->variable->compute_equal(xvar);
+    if (ystyle == EQUAL) ydir = input->variable->compute_equal(yvar);
+    if (zstyle == EQUAL) zdir = input->variable->compute_equal(zvar);
+    modify->addstep_compute(update->ntimestep + 1);
+
+    set_acceleration();
+  }
+
+  atomKK->sync(execution_space,datamask_read);
+  atomKK->modified(execution_space,datamask_modify);
+
+  x = atomKK->k_x.view<DeviceType>();
+  f = atomKK->k_f.view<DeviceType>();
+  if (atomKK->rmass)
+    rmass = atomKK->k_rmass.view<DeviceType>();
+  else
+    mass = atomKK->k_mass.view<DeviceType>();
+  type = atomKK->k_type.view<DeviceType>();
+  mask = atomKK->k_mask.view<DeviceType>();
+  int nlocal = atomKK->nlocal;
+  if (igroup == atomKK->firstgroup) nlocal = atomKK->nfirst;
+
+  copymode = 1;
+
+  eflag = 0;
+  egrav = 0.0;
+
+  if (atomKK->rmass) {
+    Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagFixGravityRMass>(0,nlocal), *this, egrav);
+  }
+  else {
+    Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagFixGravityMass>(0,nlocal), *this, egrav);
+  }
+  copymode = 0;
+}
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void FixGravityKokkos<DeviceType>::operator()(TagFixGravityRMass, const int i, double &eg) const
+{
+  if (mask[i] & groupbit) {
+    double massone = rmass[i];
+    f(i,0) += massone*xacc;
+    f(i,1) += massone*yacc;
+    f(i,2) += massone*zacc;
+    eg -= massone * (xacc*x(i,0) + yacc*x(i,1) + zacc*x(i,2));
+  }
+}
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void FixGravityKokkos<DeviceType>::operator()(TagFixGravityMass, const int i, double &eg) const
+{
+  if (mask[i] & groupbit) {
+    double massone = mass[type[i]];
+    f(i,0) += massone*xacc;
+    f(i,1) += massone*yacc;
+    f(i,2) += massone*zacc;
+    eg -= massone * (xacc*x(i,0) + yacc*x(i,1) + zacc*x(i,2));
+  }
+}
+
+namespace LAMMPS_NS {
+template class FixGravityKokkos<LMPDeviceType>;
+#ifdef KOKKOS_HAVE_CUDA
+template class FixGravityKokkos<LMPHostType>;
+#endif
+}
diff --git a/src/KOKKOS/fix_gravity_kokkos.h b/src/KOKKOS/fix_gravity_kokkos.h
new file mode 100644
index 0000000000000000000000000000000000000000..cc487a0e2dd317f4ee69f344ecc26bdb0eb423e5
--- /dev/null
+++ b/src/KOKKOS/fix_gravity_kokkos.h
@@ -0,0 +1,57 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef FIX_CLASS
+
+FixStyle(gravity/kk,FixGravityKokkos<LMPDeviceType>)
+FixStyle(gravity/kk/device,FixGravityKokkos<LMPDeviceType>)
+FixStyle(gravity/kk/host,FixGravityKokkos<LMPHostType>)
+
+#else
+
+#ifndef LMP_FIX_GRAVITY_KOKKOS_H
+#define LMP_FIX_GRAVITY_KOKKOS_H
+
+#include "fix_gravity.h"
+#include "kokkos_type.h"
+
+namespace LAMMPS_NS {
+
+struct TagFixGravityRMass {};
+struct TagFixGravityMass {};
+
+template<class DeviceType>
+class FixGravityKokkos : public FixGravity {
+  public:
+    FixGravityKokkos(class LAMMPS *, int, char **);
+    virtual ~FixGravityKokkos() {}
+    void post_force(int);
+
+    KOKKOS_INLINE_FUNCTION
+    void operator()(TagFixGravityRMass, const int, double &) const;
+    KOKKOS_INLINE_FUNCTION
+    void operator()(TagFixGravityMass, const int, double &) const;
+
+  private:
+    typename ArrayTypes<DeviceType>::t_x_array x;
+    typename ArrayTypes<DeviceType>::t_f_array f;
+    typename ArrayTypes<DeviceType>::t_float_1d_randomread rmass;
+    typename ArrayTypes<DeviceType>::t_float_1d_randomread mass;
+    typename ArrayTypes<DeviceType>::t_int_1d type;
+    typename ArrayTypes<DeviceType>::t_int_1d mask;
+};
+  
+} // namespace LAMMPS_NS
+
+#endif // LMP_FIX_GRAVITY_KOKKOS_H
+#endif // FIX_CLASS
diff --git a/src/KOKKOS/fix_langevin_kokkos.cpp b/src/KOKKOS/fix_langevin_kokkos.cpp
index 24d505e08d46854b3b72660436070ef1f9688a78..8c403d3325e515c1ba094162af8ad1539d3da04e 100644
--- a/src/KOKKOS/fix_langevin_kokkos.cpp
+++ b/src/KOKKOS/fix_langevin_kokkos.cpp
@@ -133,7 +133,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag)
 {
   // sync the device views which might have been modified on host
   atomKK->sync(execution_space,datamask_read);
-  rmass = atomKK->rmass;
+  rmass = atomKK->k_rmass.view<DeviceType>();
   f = atomKK->k_f.template view<DeviceType>();
   v = atomKK->k_v.template view<DeviceType>();
   type = atomKK->k_type.template view<DeviceType>();
@@ -191,7 +191,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag)
     if (gjfflag)
       if (tallyflag)
         if (tbiasflag == BIAS)
-          if (rmass)
+          if (rmass.data())
             if (zeroflag) {
               FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,1,1,1> post_functor(this);
               Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
@@ -210,7 +210,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag)
               Kokkos::parallel_for(nlocal,post_functor);
             }
         else
-          if (rmass)
+          if (rmass.data())
             if (zeroflag) {
               FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,0,1,1> post_functor(this);
               Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
@@ -230,7 +230,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag)
             }
       else
         if (tbiasflag == BIAS)
-          if (rmass)
+          if (rmass.data())
             if (zeroflag) {
               FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,1,1,1> post_functor(this);
               Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
@@ -249,7 +249,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag)
               Kokkos::parallel_for(nlocal,post_functor);
             }
         else
-          if (rmass)
+          if (rmass.data())
             if (zeroflag) {
               FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,0,1,1> post_functor(this);
               Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
@@ -270,7 +270,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag)
     else
       if (tallyflag)
         if (tbiasflag == BIAS)
-          if (rmass)
+          if (rmass.data())
             if (zeroflag) {
               FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,1,1,1> post_functor(this);
               Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
@@ -289,7 +289,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag)
               Kokkos::parallel_for(nlocal,post_functor);
             }
         else
-          if (rmass)
+          if (rmass.data())
             if (zeroflag) {
               FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,0,1,1> post_functor(this);
               Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
@@ -309,7 +309,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag)
             }
       else
         if (tbiasflag == BIAS)
-          if (rmass)
+          if (rmass.data())
             if (zeroflag) {
               FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,1,1,1> post_functor(this);
               Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
@@ -328,7 +328,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag)
               Kokkos::parallel_for(nlocal,post_functor);
             }
         else
-          if (rmass)
+          if (rmass.data())
             if (zeroflag) {
               FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,0,1,1> post_functor(this);
               Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
@@ -350,7 +350,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag)
     if (gjfflag)
       if (tallyflag)
         if (tbiasflag == BIAS)
-          if (rmass)
+          if (rmass.data())
             if (zeroflag) {
               FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,1,1,1> post_functor(this);
               Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
@@ -369,7 +369,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag)
               Kokkos::parallel_for(nlocal,post_functor);
             }
         else
-          if (rmass)
+          if (rmass.data())
             if (zeroflag) {
               FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,0,1,1> post_functor(this);
               Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
@@ -389,7 +389,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag)
             }
       else
         if (tbiasflag == BIAS)
-          if (rmass)
+          if (rmass.data())
             if (zeroflag) {
               FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,1,1,1> post_functor(this);
               Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
@@ -408,7 +408,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag)
               Kokkos::parallel_for(nlocal,post_functor);
             }
         else
-          if (rmass)
+          if (rmass.data())
             if (zeroflag) {
               FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,0,1,1> post_functor(this);
               Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
@@ -429,7 +429,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag)
     else
       if (tallyflag)
         if (tbiasflag == BIAS)
-          if (rmass)
+          if (rmass.data())
             if (zeroflag) {
               FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,1,1,1> post_functor(this);
               Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
@@ -448,7 +448,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag)
               Kokkos::parallel_for(nlocal,post_functor);
             }
         else
-          if (rmass)
+          if (rmass.data())
             if (zeroflag) {
               FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,0,1,1> post_functor(this);
               Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
@@ -468,7 +468,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag)
             }
       else
         if (tbiasflag == BIAS)
-          if (rmass)
+          if (rmass.data())
             if (zeroflag) {
               FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,1,1,1> post_functor(this);
               Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
@@ -487,7 +487,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag)
               Kokkos::parallel_for(nlocal,post_functor);
             }
         else
-          if (rmass)
+          if (rmass.data())
             if (zeroflag) {
               FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,0,1,1> post_functor(this);
               Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
diff --git a/src/KOKKOS/fix_langevin_kokkos.h b/src/KOKKOS/fix_langevin_kokkos.h
index a9d015f6a1a669c76472a8dc300b509ade85b9e7..140fea81d69456c4d1b21d40a1b27c24c257e3fd 100644
--- a/src/KOKKOS/fix_langevin_kokkos.h
+++ b/src/KOKKOS/fix_langevin_kokkos.h
@@ -93,7 +93,7 @@ namespace LAMMPS_NS {
   private:
     class CommKokkos *commKK;
 
-    double *rmass;
+    typename ArrayTypes<DeviceType>::t_float_1d rmass;
     typename ArrayTypes<DeviceType>::tdual_double_2d k_franprev;
     typename ArrayTypes<DeviceType>::t_double_2d d_franprev;
     HAT::t_double_2d h_franprev;
diff --git a/src/KOKKOS/fix_neigh_history_kokkos.cpp b/src/KOKKOS/fix_neigh_history_kokkos.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d481c20818f1362ae0894b1fb00d2b014699d5cb
--- /dev/null
+++ b/src/KOKKOS/fix_neigh_history_kokkos.cpp
@@ -0,0 +1,335 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "fix_neigh_history_kokkos.h"
+#include "atom_kokkos.h"
+#include "error.h"
+#include "memory_kokkos.h"
+#include "neigh_list_kokkos.h"
+#include "pair_kokkos.h"
+#include "comm.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+template <class DeviceType>
+FixNeighHistoryKokkos<DeviceType>::FixNeighHistoryKokkos(LAMMPS *lmp, int narg, char **arg) :
+  FixNeighHistory(lmp, narg, arg)
+{
+  kokkosable = 1;
+  atomKK = (AtomKokkos *)atom;
+  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
+
+  memory->destroy(npartner);
+  memory->sfree(partner);
+  memory->sfree(valuepartner);
+  npartner = NULL;
+  partner = NULL;
+  valuepartner = NULL;
+
+  maxpartner = 8;
+  grow_arrays(atom->nmax);
+
+  d_resize = typename ArrayTypes<DeviceType>::t_int_scalar("FixNeighHistoryKokkos::resize");
+#ifndef KOKKOS_USE_CUDA_UVM
+  h_resize = Kokkos::create_mirror_view(d_resize);
+#else
+  h_resize = d_resize;
+#endif
+  h_resize() = 1;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <class DeviceType>
+FixNeighHistoryKokkos<DeviceType>::~FixNeighHistoryKokkos()
+{
+  if (copymode) return;
+
+  memoryKK->destroy_kokkos(k_npartner, npartner);
+  memoryKK->destroy_kokkos(k_partner, partner);
+  memoryKK->destroy_kokkos(k_valuepartner, valuepartner);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <class DeviceType>
+void FixNeighHistoryKokkos<DeviceType>::init()
+{
+  if (atomKK->tag_enable == 0)
+    error->all(FLERR,"Neighbor history requires atoms have IDs");
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <class DeviceType>
+void FixNeighHistoryKokkos<DeviceType>::pre_exchange()
+{
+  copymode = 1;
+  
+  h_resize() = 1;
+  while (h_resize() > 0) {
+    FixNeighHistoryKokkosZeroPartnerCountFunctor<DeviceType> zero(this);
+    Kokkos::parallel_for(nlocal_neigh,zero);
+
+    h_resize() = 0;
+    deep_copy(d_resize, h_resize);
+
+    FixNeighHistoryKokkosPreExchangeFunctor<DeviceType> f(this);
+    Kokkos::parallel_for(nlocal_neigh,f);
+
+    deep_copy(h_resize, d_resize);
+    if (h_resize() > 0) {
+      maxpartner += 8;
+      memoryKK->grow_kokkos(k_partner,partner,atom->nmax,maxpartner,"neighbor_history:partner");
+      memoryKK->grow_kokkos(k_valuepartner,valuepartner,atom->nmax,dnum*maxpartner,"neighbor_history:valuepartner");
+    }
+  }
+
+  copymode = 0;
+
+  comm->maxexchange_fix = MAX(comm->maxexchange_fix,(dnum+1)*maxpartner+1);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <class DeviceType>
+void FixNeighHistoryKokkos<DeviceType>::zero_partner_count_item(const int &i) const
+{
+  d_npartner[i] = 0;
+}
+
+template <class DeviceType>
+void FixNeighHistoryKokkos<DeviceType>::pre_exchange_item(const int &ii) const
+{
+  const int i = d_ilist[ii];
+  const int jnum = d_numneigh[i];
+
+  for (int jj = 0; jj < jnum; jj++) {
+    if (d_firstflag(i,jj)) {
+      int j = d_neighbors(i,jj);
+      j &= NEIGHMASK;
+      int m = Kokkos::atomic_fetch_add(&d_npartner[i],1);
+      if (m < maxpartner) {
+	d_partner(i,m) = tag[j];
+	for (int k = 0; k < dnum; k++)
+	  d_valuepartner(i,dnum*m+k) = d_firstvalue(i,dnum*jj+k);
+      } else {
+	d_resize() = 1;
+      }
+      if (j < nlocal_neigh) {
+	m = Kokkos::atomic_fetch_add(&d_npartner[j],1);
+	if (m < maxpartner) {
+	  d_partner(j,m) = tag[i];
+	  for (int k = 0; k < dnum; k++)
+	    d_valuepartner(j,dnum*m+k) = d_firstvalue(i,dnum*jj+k);
+	} else {
+	  d_resize() = 1;
+	}
+      }
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <class DeviceType>
+void FixNeighHistoryKokkos<DeviceType>::setup_post_neighbor()
+{
+  post_neighbor();
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <class DeviceType>
+void FixNeighHistoryKokkos<DeviceType>::post_neighbor()
+{
+  tag = atomKK->k_tag.view<DeviceType>();
+  
+  int inum = pair->list->inum;
+  NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(pair->list);
+  d_numneigh = k_list->d_numneigh;
+  d_neighbors = k_list->d_neighbors;
+  d_ilist = k_list->d_ilist;
+
+  // store atom counts used for new neighbor list which was just built
+
+  int nlocal = atom->nlocal;
+  int nall = nlocal + atom->nghost;
+  nlocal_neigh = nlocal;
+  nall_neigh = nall;
+
+  // realloc firstflag and firstvalue if needed
+
+  if (maxatom < nlocal || k_list->maxneighs > d_firstflag.extent(1)) {
+    maxatom = nall;
+    d_firstflag = Kokkos::View<int**>("neighbor_history:firstflag",maxatom,k_list->maxneighs);
+    d_firstvalue = Kokkos::View<LMP_FLOAT**>("neighbor_history:firstvalue",maxatom,k_list->maxneighs*dnum);
+  }
+
+  copymode = 1;
+  
+  FixNeighHistoryKokkosPostNeighborFunctor<DeviceType> f(this);
+  Kokkos::parallel_for(inum,f);
+
+  copymode = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void FixNeighHistoryKokkos<DeviceType>::post_neighbor_item(const int &ii) const
+{
+  const int i = d_ilist[ii];
+  const int jnum = d_numneigh[i];
+  const int np = d_npartner[i];
+
+  for (int jj = 0; jj < jnum; jj++) {
+    int j = d_neighbors(i,jj);
+    const int rflag = j >> SBBITS & 3;
+    j &= NEIGHMASK;
+
+    int m;
+    if (rflag) {
+      int jtag = tag(j);
+      for (m = 0; m < np; m++)
+	if (d_partner(i, m) == jtag) break;
+      if (m < np) {
+	d_firstflag(i,jj) = 1;
+	for (int k = 0; k < dnum; k++) {
+	  d_firstvalue(i, dnum*jj+k) = d_valuepartner(i, dnum*m+k);
+	}
+      } else {
+	d_firstflag(i,jj) = 0;
+	for (int k = 0; k < dnum; k++) {
+	  d_firstvalue(i, dnum*jj+k) = 0;
+	}
+      }
+    } else {
+      d_firstflag(i,jj) = 0;
+      for (int k = 0; k < dnum; k++) {
+	d_firstvalue(i, dnum*jj+k) = 0;
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   memory usage of local atom-based arrays
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+double FixNeighHistoryKokkos<DeviceType>::memory_usage()
+{
+  double bytes = d_firstflag.extent(0)*d_firstflag.extent(1)*sizeof(int);
+  bytes += d_firstvalue.extent(0)*d_firstvalue.extent(1)*sizeof(double);
+  bytes += 2*k_npartner.extent(0)*sizeof(int);
+  bytes += 2*k_partner.extent(0)*k_partner.extent(1)*sizeof(int);
+  bytes += 2*k_valuepartner.extent(0)*k_valuepartner.extent(1)*sizeof(double);
+  return bytes;
+}
+
+/* ----------------------------------------------------------------------
+   allocate fictitious charge arrays
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void FixNeighHistoryKokkos<DeviceType>::grow_arrays(int nmax)
+{
+  k_npartner.template sync<LMPHostType>(); // force reallocation on host
+  k_partner.template sync<LMPHostType>();
+  k_valuepartner.template sync<LMPHostType>();
+  
+  memoryKK->grow_kokkos(k_npartner,npartner,nmax,"neighbor_history:npartner");
+  memoryKK->grow_kokkos(k_partner,partner,nmax,maxpartner,"neighbor_history:partner");
+  memoryKK->grow_kokkos(k_valuepartner,valuepartner,nmax,dnum*maxpartner,"neighbor_history:valuepartner");
+
+  d_npartner = k_npartner.template view<DeviceType>();
+  d_partner = k_partner.template view<DeviceType>();
+  d_valuepartner = k_valuepartner.template view<DeviceType>();
+
+  k_npartner.template modify<LMPHostType>();
+  k_partner.template modify<LMPHostType>();
+  k_valuepartner.template modify<LMPHostType>();
+}
+
+/* ----------------------------------------------------------------------
+   copy values within fictitious charge arrays
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void FixNeighHistoryKokkos<DeviceType>::copy_arrays(int i, int j, int delflag)
+{
+  k_npartner.template sync<LMPHostType>();
+  k_partner.template sync<LMPHostType>();
+  k_valuepartner.template sync<LMPHostType>();
+
+  npartner[j] = npartner[i];
+  for (int m = 0; m < npartner[i]; m++) {
+    partner[j][m] = partner[i][m];
+    valuepartner[j][m] = valuepartner[i][m];
+  }
+
+  k_npartner.template modify<LMPHostType>();
+  k_partner.template modify<LMPHostType>();
+  k_valuepartner.template modify<LMPHostType>();
+}
+
+/* ----------------------------------------------------------------------
+   pack values in local atom-based array for exchange with another proc
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+int FixNeighHistoryKokkos<DeviceType>::pack_exchange(int i, double *buf)
+{
+  k_npartner.template sync<LMPHostType>();
+  k_partner.template sync<LMPHostType>();
+  k_valuepartner.template sync<LMPHostType>();
+
+  int n = 0;
+  buf[n++] = npartner[i];
+  for (int m = 0; m < npartner[i]; m++) buf[n++] = partner[i][m];
+  for (int m = 0; m < dnum*npartner[i]; m++) buf[n++] = valuepartner[i][m];
+
+  return n;
+}
+
+/* ----------------------------------------------------------------------
+   unpack values in local atom-based array from exchange with another proc
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+int FixNeighHistoryKokkos<DeviceType>::unpack_exchange(int nlocal, double *buf)
+{
+  int n = 0;
+  npartner[nlocal] = static_cast<int>(buf[n++]);
+  for (int m = 0; m < npartner[nlocal]; m++) partner[nlocal][m] = static_cast<int>(buf[n++]);
+  for (int m = 0; m < dnum*npartner[nlocal]; m++) valuepartner[nlocal][m] = buf[n++];
+
+  k_npartner.template modify<LMPHostType>();
+  k_partner.template modify<LMPHostType>();
+  k_valuepartner.template modify<LMPHostType>();
+
+  return n;
+}
+
+/* ---------------------------------------------------------------------- */
+
+namespace LAMMPS_NS {
+template class FixNeighHistoryKokkos<LMPDeviceType>;
+#ifdef KOKKOS_HAVE_CUDA
+template class FixNeighHistoryKokkos<LMPHostType>;
+#endif
+}
diff --git a/src/KOKKOS/fix_neigh_history_kokkos.h b/src/KOKKOS/fix_neigh_history_kokkos.h
new file mode 100644
index 0000000000000000000000000000000000000000..d5bb1c3971d5dbea19cca627e0314ae1c4b5cb35
--- /dev/null
+++ b/src/KOKKOS/fix_neigh_history_kokkos.h
@@ -0,0 +1,107 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef FIX_CLASS
+
+FixStyle(NEIGH_HISTORY/KK,FixNeighHistoryKokkos<LMPDeviceType>)
+FixStyle(NEIGH_HISTORY/KK/DEVICE,FixNeighHistoryKokkos<LMPDeviceType>)
+FixStyle(NEIGH_HISTORY/KK/HOST,FixNeighHistoryKokkos<LMPHostType>)
+
+#else
+
+#ifndef LMP_FIX_NEIGH_HISTORY_KOKKOS_H
+#define LMP_FIX_NEIGH_HISTORY_KOKKOS_H
+
+#include "fix_neigh_history.h"
+#include "kokkos_type.h"
+
+namespace LAMMPS_NS {
+template <class DeviceType>
+class FixNeighHistoryKokkos : public FixNeighHistory {
+ public:
+  FixNeighHistoryKokkos(class LAMMPS *, int, char **);
+  ~FixNeighHistoryKokkos();
+
+  void init();
+  void pre_exchange();
+  void setup_post_neighbor();
+  virtual void post_neighbor();
+  double memory_usage();
+  void grow_arrays(int);
+  void copy_arrays(int, int, int);
+  int pack_exchange(int, double *);
+  int unpack_exchange(int, double *);
+
+  KOKKOS_INLINE_FUNCTION
+  void zero_partner_count_item(const int &i) const;
+  KOKKOS_INLINE_FUNCTION
+  void pre_exchange_item(const int &ii) const;
+  KOKKOS_INLINE_FUNCTION
+  void post_neighbor_item(const int &ii) const;
+
+  typename Kokkos::View<int**> d_firstflag;
+  typename Kokkos::View<LMP_FLOAT**> d_firstvalue;
+
+ private:
+  typename ArrayTypes<DeviceType>::tdual_int_1d k_npartner;
+  typename ArrayTypes<DeviceType>::tdual_tagint_2d k_partner;
+  typename ArrayTypes<DeviceType>::tdual_float_2d k_valuepartner;
+
+  // for neighbor list lookup
+  typename ArrayTypes<DeviceType>::t_neighbors_2d d_neighbors;
+  typename ArrayTypes<DeviceType>::t_int_1d_randomread d_ilist;
+  typename ArrayTypes<DeviceType>::t_int_1d_randomread d_numneigh;
+  
+  typename ArrayTypes<DeviceType>::t_tagint_1d tag;
+  typename ArrayTypes<DeviceType>::t_int_1d d_npartner;
+  typename ArrayTypes<DeviceType>::t_tagint_2d d_partner;
+  typename ArrayTypes<DeviceType>::t_float_2d d_valuepartner; 
+
+  typename ArrayTypes<DeviceType>::t_int_scalar d_resize;
+  typename ArrayTypes<LMPHostType>::t_int_scalar h_resize;
+};
+
+template <class DeviceType>
+struct FixNeighHistoryKokkosZeroPartnerCountFunctor {
+  FixNeighHistoryKokkos<DeviceType> c;
+  FixNeighHistoryKokkosZeroPartnerCountFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {}
+  KOKKOS_INLINE_FUNCTION
+  void operator()(const int &i) const {
+    c.zero_partner_count_item(i);
+  }
+};
+
+template <class DeviceType>
+struct FixNeighHistoryKokkosPreExchangeFunctor {
+  FixNeighHistoryKokkos<DeviceType> c;
+  FixNeighHistoryKokkosPreExchangeFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {}
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int &i) const {
+    c.pre_exchange_item(i);
+  }
+};
+
+template <class DeviceType>
+struct FixNeighHistoryKokkosPostNeighborFunctor {
+  FixNeighHistoryKokkos<DeviceType> c;
+  FixNeighHistoryKokkosPostNeighborFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {}
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int &i) const {
+    c.post_neighbor_item(i);
+  }
+};
+
+} // namespace LAMMPS_NS
+
+#endif // LMP_FIX_NEIGH_HISTORY_KOKKOS_H
+#endif // FIX_CLASS
diff --git a/src/KOKKOS/fix_nh_kokkos.cpp b/src/KOKKOS/fix_nh_kokkos.cpp
index 347177b7f454bc03abfd739e8d51f218065b24de..42d421e92ef85bfc1539c808e61d55f508283b9f 100644
--- a/src/KOKKOS/fix_nh_kokkos.cpp
+++ b/src/KOKKOS/fix_nh_kokkos.cpp
@@ -537,7 +537,7 @@ void FixNHKokkos<DeviceType>::nve_v()
 
   v = atomKK->k_v.view<DeviceType>();
   f = atomKK->k_f.view<DeviceType>();
-  rmass = atomKK->rmass;
+  rmass = atomKK->k_rmass.view<DeviceType>();
   mass = atomKK->k_mass.view<DeviceType>();
   type = atomKK->k_type.view<DeviceType>();
   mask = atomKK->k_mask.view<DeviceType>();
@@ -545,7 +545,7 @@ void FixNHKokkos<DeviceType>::nve_v()
   if (igroup == atomKK->firstgroup) nlocal = atomKK->nfirst;
 
   copymode = 1;
-  if (rmass)
+  if (rmass.data())
     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNH_nve_v<1> >(0,nlocal),*this);
   else
     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNH_nve_v<0> >(0,nlocal),*this);
diff --git a/src/KOKKOS/fix_nh_kokkos.h b/src/KOKKOS/fix_nh_kokkos.h
index a625cdbc204c82c10e209f537115198b6b811d73..ae18bd6dbdfd075325055a3dd0b72057ede45ac4 100644
--- a/src/KOKKOS/fix_nh_kokkos.h
+++ b/src/KOKKOS/fix_nh_kokkos.h
@@ -71,8 +71,8 @@ class FixNHKokkos : public FixNH {
   typename ArrayTypes<DeviceType>::t_x_array x;
   typename ArrayTypes<DeviceType>::t_v_array v;
   typename ArrayTypes<DeviceType>::t_f_array_const f;
-  double *rmass;
-  typename ArrayTypes<DeviceType>::t_float_1d_randomread mass;
+  typename ArrayTypes<DeviceType>::t_float_1d rmass;
+  typename ArrayTypes<DeviceType>::t_float_1d mass;
   typename ArrayTypes<DeviceType>::t_int_1d type;
   typename ArrayTypes<DeviceType>::t_int_1d mask;
 };
diff --git a/src/KOKKOS/fix_nve_kokkos.cpp b/src/KOKKOS/fix_nve_kokkos.cpp
index cb3eadcd9009f8e7a4426dae6a2bc0c459592111..621f16f4201c76a25162ec325e0ddae1d705f916 100644
--- a/src/KOKKOS/fix_nve_kokkos.cpp
+++ b/src/KOKKOS/fix_nve_kokkos.cpp
@@ -62,14 +62,14 @@ void FixNVEKokkos<DeviceType>::initial_integrate(int vflag)
   x = atomKK->k_x.view<DeviceType>();
   v = atomKK->k_v.view<DeviceType>();
   f = atomKK->k_f.view<DeviceType>();
-  rmass = atomKK->rmass;
+  rmass = atomKK->k_rmass.view<DeviceType>();
   mass = atomKK->k_mass.view<DeviceType>();
   type = atomKK->k_type.view<DeviceType>();
   mask = atomKK->k_mask.view<DeviceType>();
   int nlocal = atomKK->nlocal;
   if (igroup == atomKK->firstgroup) nlocal = atomKK->nfirst;
 
-  if (rmass) {
+  if (rmass.data()) {
     FixNVEKokkosInitialIntegrateFunctor<DeviceType,1> functor(this);
     Kokkos::parallel_for(nlocal,functor);
   } else {
@@ -118,14 +118,14 @@ void FixNVEKokkos<DeviceType>::final_integrate()
 
   v = atomKK->k_v.view<DeviceType>();
   f = atomKK->k_f.view<DeviceType>();
-  rmass = atomKK->rmass;
+  rmass = atomKK->k_rmass.view<DeviceType>();
   mass = atomKK->k_mass.view<DeviceType>();
   type = atomKK->k_type.view<DeviceType>();
   mask = atomKK->k_mask.view<DeviceType>();
   int nlocal = atomKK->nlocal;
   if (igroup == atomKK->firstgroup) nlocal = atomKK->nfirst;
 
-  if (rmass) {
+  if (rmass.data()) {
     FixNVEKokkosFinalIntegrateFunctor<DeviceType,1> functor(this);
     Kokkos::parallel_for(nlocal,functor);
   } else {
diff --git a/src/KOKKOS/fix_nve_kokkos.h b/src/KOKKOS/fix_nve_kokkos.h
index 578e0f46cc932efc4b6434b97612b64a64ef8533..e2230d86b622cabd5ff063987688ae3891d71410 100644
--- a/src/KOKKOS/fix_nve_kokkos.h
+++ b/src/KOKKOS/fix_nve_kokkos.h
@@ -60,8 +60,8 @@ class FixNVEKokkos : public FixNVE {
   typename ArrayTypes<DeviceType>::t_x_array x;
   typename ArrayTypes<DeviceType>::t_v_array v;
   typename ArrayTypes<DeviceType>::t_f_array_const f;
-  double *rmass;
-  typename ArrayTypes<DeviceType>::t_float_1d_randomread mass;
+  typename ArrayTypes<DeviceType>::t_float_1d rmass;
+  typename ArrayTypes<DeviceType>::t_float_1d mass;
   typename ArrayTypes<DeviceType>::t_int_1d type;
   typename ArrayTypes<DeviceType>::t_int_1d mask;
 };
diff --git a/src/KOKKOS/fix_nve_sphere_kokkos.cpp b/src/KOKKOS/fix_nve_sphere_kokkos.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d636f56b20747656a50b6f12ec529d4e37724ef0
--- /dev/null
+++ b/src/KOKKOS/fix_nve_sphere_kokkos.cpp
@@ -0,0 +1,155 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "fix_nve_sphere_kokkos.h"
+#include "atom_masks.h"
+#include "atom_kokkos.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+enum{NONE,DIPOLE};
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+FixNVESphereKokkos<DeviceType>::FixNVESphereKokkos(LAMMPS *lmp, int narg, char **arg) :
+  FixNVESphere(lmp, narg, arg)
+{
+  kokkosable = 1;
+  atomKK = (AtomKokkos *)atom;
+  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
+
+  datamask_read = F_MASK | TORQUE_MASK | RMASS_MASK | RADIUS_MASK | MASK_MASK;
+  datamask_modify = X_MASK | V_MASK | OMEGA_MASK;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void FixNVESphereKokkos<DeviceType>::cleanup_copy()
+{
+  id = style = NULL;
+  vatom = NULL;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void FixNVESphereKokkos<DeviceType>::init()
+{
+  FixNVESphere::init();
+
+  if (extra == DIPOLE) {
+    error->all(FLERR,"Fix nve/sphere/kk doesn't yet support dipole");
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void FixNVESphereKokkos<DeviceType>::initial_integrate(int vflag)
+{
+  atomKK->sync(execution_space,datamask_read);
+  atomKK->modified(execution_space,datamask_modify);
+
+  x = atomKK->k_x.view<DeviceType>();
+  v = atomKK->k_v.view<DeviceType>();
+  omega = atomKK->k_omega.view<DeviceType>();
+  f = atomKK->k_f.view<DeviceType>();
+  torque = atomKK->k_torque.view<DeviceType>();
+  mask = atomKK->k_mask.view<DeviceType>();
+  rmass = atomKK->k_rmass.view<DeviceType>();
+  radius = atomKK->k_radius.view<DeviceType>();
+
+  int nlocal = atom->nlocal;
+  if (igroup == atom->firstgroup) nlocal = atom->nfirst;
+
+  FixNVESphereKokkosInitialIntegrateFunctor<DeviceType> f(this); 
+  Kokkos::parallel_for(nlocal,f);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void FixNVESphereKokkos<DeviceType>::initial_integrate_item(const int i) const
+{
+  const double dtfrotate = dtf / inertia;
+
+  if (mask(i) & groupbit) {
+    const double dtfm = dtf / rmass(i);
+    v(i,0) += dtfm * f(i,0);
+    v(i,1) += dtfm * f(i,1);
+    v(i,2) += dtfm * f(i,2);
+    x(i,0) += dtv * v(i,0);
+    x(i,1) += dtv * v(i,1);
+    x(i,2) += dtv * v(i,2);
+      
+    const double dtirotate = dtfrotate / (radius(i)*radius(i)*rmass(i));
+    omega(i,0) += dtirotate * torque(i,0);
+    omega(i,1) += dtirotate * torque(i,1);
+    omega(i,2) += dtirotate * torque(i,2);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void FixNVESphereKokkos<DeviceType>::final_integrate()
+{
+  atomKK->sync(execution_space,datamask_read);
+  atomKK->modified(execution_space,datamask_modify);
+
+  v = atomKK->k_v.view<DeviceType>();
+  omega = atomKK->k_omega.view<DeviceType>();
+  f = atomKK->k_f.view<DeviceType>();
+  torque = atomKK->k_torque.view<DeviceType>();
+  mask = atomKK->k_mask.view<DeviceType>();
+  rmass = atomKK->k_rmass.view<DeviceType>();
+  radius = atomKK->k_radius.view<DeviceType>();
+
+  int nlocal = atom->nlocal;
+  if (igroup == atom->firstgroup) nlocal = atom->nfirst;
+
+  FixNVESphereKokkosFinalIntegrateFunctor<DeviceType> f(this);
+  Kokkos::parallel_for(nlocal,f);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void FixNVESphereKokkos<DeviceType>::final_integrate_item(const int i) const
+{
+  const double dtfrotate = dtf / inertia;
+
+  if (mask(i) & groupbit) {
+    const double dtfm = dtf / rmass(i);
+    v(i,0) += dtfm * f(i,0);
+    v(i,1) += dtfm * f(i,1);
+    v(i,2) += dtfm * f(i,2);
+      
+    const double dtirotate = dtfrotate / (radius(i)*radius(i)*rmass(i));
+    omega(i,0) += dtirotate * torque(i,0);
+    omega(i,1) += dtirotate * torque(i,1);
+    omega(i,2) += dtirotate * torque(i,2);
+  }
+}
+
+namespace LAMMPS_NS {
+template class FixNVESphereKokkos<LMPDeviceType>;
+#ifdef KOKKOS_HAVE_CUDA
+template class FixNVESphereKokkos<LMPHostType>;
+#endif
+}
diff --git a/src/KOKKOS/fix_nve_sphere_kokkos.h b/src/KOKKOS/fix_nve_sphere_kokkos.h
new file mode 100644
index 0000000000000000000000000000000000000000..382e530319f85ec52945dc90ea25bf47fe7f1b7f
--- /dev/null
+++ b/src/KOKKOS/fix_nve_sphere_kokkos.h
@@ -0,0 +1,79 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef FIX_CLASS
+
+FixStyle(nve/sphere/kk,FixNVESphereKokkos<LMPDeviceType>)
+FixStyle(nve/sphere/kk/device,FixNVESphereKokkos<LMPDeviceType>)
+FixStyle(nve/sphere/kk/host,FixNVESphereKokkos<LMPHostType>)
+
+#else
+
+#ifndef LMP_FIX_NVE_SPHERE_KOKKOS_H
+#define LMP_FIX_NVE_SPHERE_KOKKOS_H
+
+#include "fix_nve_sphere.h"
+#include "kokkos_type.h"
+
+namespace LAMMPS_NS {
+  
+template<class DeviceType>
+class FixNVESphereKokkos : public FixNVESphere {
+  public:
+    FixNVESphereKokkos(class LAMMPS *, int, char **);
+    virtual ~FixNVESphereKokkos() {}
+    void cleanup_copy();
+    void init();
+    void initial_integrate(int);
+    void final_integrate();
+  
+    KOKKOS_INLINE_FUNCTION
+    void initial_integrate_item(const int i) const;
+    KOKKOS_INLINE_FUNCTION
+    void final_integrate_item(const int i) const;
+
+  private:
+    typename ArrayTypes<DeviceType>::t_x_array x;
+    typename ArrayTypes<DeviceType>::t_v_array v;
+    typename ArrayTypes<DeviceType>::t_v_array omega;
+    typename ArrayTypes<DeviceType>::t_f_array f;
+    typename ArrayTypes<DeviceType>::t_f_array torque;
+    typename ArrayTypes<DeviceType>::t_float_1d rmass;
+    typename ArrayTypes<DeviceType>::t_float_1d radius;
+    typename ArrayTypes<DeviceType>::t_int_1d mask;
+};
+
+template <class DeviceType>
+struct FixNVESphereKokkosInitialIntegrateFunctor {
+  FixNVESphereKokkos<DeviceType> c;
+  FixNVESphereKokkosInitialIntegrateFunctor(FixNVESphereKokkos<DeviceType> *c_ptr): c(*c_ptr) { c.cleanup_copy(); }
+  KOKKOS_INLINE_FUNCTION
+  void operator()(const int i) const {
+    c.initial_integrate_item(i);
+  }
+};
+
+template <class DeviceType>
+struct FixNVESphereKokkosFinalIntegrateFunctor {
+  FixNVESphereKokkos<DeviceType> c;
+  FixNVESphereKokkosFinalIntegrateFunctor(FixNVESphereKokkos<DeviceType> *c_ptr): c(*c_ptr) { c.cleanup_copy(); }
+  KOKKOS_INLINE_FUNCTION
+  void operator()(const int i) const {
+    c.final_integrate_item(i);
+  }
+};
+
+} // namespace LAMMPS_NS
+
+#endif // LMP_FIX_NVE_SPHERE_KOKKOS_H
+#endif // FIX_CLASS
diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp
index 4411627a784f01329cb35da5afbb9457597d45e1..6f75e6cabcb05dea636941733b240c5cf9203d1e 100644
--- a/src/KOKKOS/npair_kokkos.cpp
+++ b/src/KOKKOS/npair_kokkos.cpp
@@ -24,8 +24,8 @@ namespace LAMMPS_NS {
 
 /* ---------------------------------------------------------------------- */
 
-template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI>
-NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::NPairKokkos(LAMMPS *lmp) : NPair(lmp) {
+template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI, int SIZE>
+NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::NPairKokkos(LAMMPS *lmp) : NPair(lmp) {
 
 }
 
@@ -33,8 +33,8 @@ NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::NPairKokkos(LAMMPS *lmp) : NPair(l
    copy needed info from Neighbor class to this build class
    ------------------------------------------------------------------------- */
 
-template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI>
-void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::copy_neighbor_info()
+template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI, int SIZE>
+void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::copy_neighbor_info()
 {
   NPair::copy_neighbor_info();
 
@@ -63,8 +63,8 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::copy_neighbor_info()
  copy per-atom and per-bin vectors from NBin class to this build class
  ------------------------------------------------------------------------- */
 
-template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI>
-void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::copy_bin_info()
+template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI, int SIZE>
+void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::copy_bin_info()
 {
   NPair::copy_bin_info();
 
@@ -80,8 +80,8 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::copy_bin_info()
  copy needed info from NStencil class to this build class
  ------------------------------------------------------------------------- */
 
-template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI>
-void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::copy_stencil_info()
+template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI, int SIZE>
+void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::copy_stencil_info()
 {
   NPair::copy_stencil_info();
 
@@ -110,8 +110,8 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::copy_stencil_info()
 
 /* ---------------------------------------------------------------------- */
 
-template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI>
-void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::build(NeighList *list_)
+template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI, int SIZE>
+void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::build(NeighList *list_)
 {
   NeighListKokkos<DeviceType>* list = (NeighListKokkos<DeviceType>*) list_;
   const int nlocal = includegroup?atom->nfirst:atom->nlocal;
@@ -131,6 +131,7 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::build(NeighList *list_)
          k_stencilxyz.view<DeviceType>(),
          nlocal,
          atomKK->k_x.view<DeviceType>(),
+         atomKK->k_radius.view<DeviceType>(),
          atomKK->k_type.view<DeviceType>(),
          atomKK->k_mask.view<DeviceType>(),
          atomKK->k_molecule.view<DeviceType>(),
@@ -155,7 +156,8 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::build(NeighList *list_)
          k_ex_mol_intra.view<DeviceType>(),
          bboxhi,bboxlo,
          domain->xperiodic,domain->yperiodic,domain->zperiodic,
-         domain->xprd_half,domain->yprd_half,domain->zprd_half);
+         domain->xprd_half,domain->yprd_half,domain->zprd_half,
+	 skin);
 
   k_cutneighsq.sync<DeviceType>();
   k_ex1_type.sync<DeviceType>();
@@ -171,7 +173,7 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::build(NeighList *list_)
   k_bincount.sync<DeviceType>();
   k_bins.sync<DeviceType>();
   k_atom2bin.sync<DeviceType>();
-  atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK);
+  atomKK->sync(Device,X_MASK|RADIUS_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK);
 
   data.special_flag[0] = special_flag[0];
   data.special_flag[1] = special_flag[1];
@@ -198,25 +200,49 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::build(NeighList *list_)
       Kokkos::parallel_for(nall, f);
     } else {
       if (newton_pair) {
-        NPairKokkosBuildFunctor<DeviceType,TRI?0:HALF_NEIGH,1,TRI> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
+	if (SIZE) {
+	  NPairKokkosBuildFunctorSize<DeviceType,TRI?0:HALF_NEIGH,1,TRI> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
 #ifdef KOKKOS_HAVE_CUDA
-        if (ExecutionSpaceFromDevice<DeviceType>::space == Device)
-          Kokkos::parallel_for(config, f);
-        else
-          Kokkos::parallel_for(nall, f);
+	  if (ExecutionSpaceFromDevice<DeviceType>::space == Device)
+	    Kokkos::parallel_for(config, f);
+	  else
+	    Kokkos::parallel_for(nall, f);
 #else
-        Kokkos::parallel_for(nall, f);
+	  Kokkos::parallel_for(nall, f);
 #endif
+	} else {
+	  NPairKokkosBuildFunctor<DeviceType,TRI?0:HALF_NEIGH,1,TRI> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
+#ifdef KOKKOS_HAVE_CUDA
+	  if (ExecutionSpaceFromDevice<DeviceType>::space == Device)
+	    Kokkos::parallel_for(config, f);
+	  else
+	    Kokkos::parallel_for(nall, f);
+#else
+	  Kokkos::parallel_for(nall, f);
+#endif
+	}
       } else {
-        NPairKokkosBuildFunctor<DeviceType,HALF_NEIGH,0,0> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
+	if (SIZE) {
+	  NPairKokkosBuildFunctorSize<DeviceType,HALF_NEIGH,0,0> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
 #ifdef KOKKOS_HAVE_CUDA
-        if (ExecutionSpaceFromDevice<DeviceType>::space == Device)
-          Kokkos::parallel_for(config, f);
-        else
-          Kokkos::parallel_for(nall, f);
+	  if (ExecutionSpaceFromDevice<DeviceType>::space == Device)
+	    Kokkos::parallel_for(config, f);
+	  else
+	    Kokkos::parallel_for(nall, f);
 #else
-        Kokkos::parallel_for(nall, f);
+	  Kokkos::parallel_for(nall, f);
 #endif
+	} else {
+	  NPairKokkosBuildFunctor<DeviceType,HALF_NEIGH,0,0> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
+#ifdef KOKKOS_HAVE_CUDA
+	  if (ExecutionSpaceFromDevice<DeviceType>::space == Device)
+	    Kokkos::parallel_for(config, f);
+	  else
+	    Kokkos::parallel_for(nall, f);
+#else
+	  Kokkos::parallel_for(nall, f);
+#endif
+	}
       }
     }
     deep_copy(data.h_resize, data.resize);
@@ -774,19 +800,306 @@ void NeighborKokkosExecute<DeviceType>::
   neigh_list.d_ilist(i) = i;
 }
 
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType> template<int HalfNeigh,int Newton,int Tri>
+void NeighborKokkosExecute<DeviceType>::
+   build_ItemSize(const int &i) const
+{
+  /* if necessary, goto next page and add pages */
+  int n = 0;
+
+  // get subview of neighbors of i
+
+  const AtomNeighbors neighbors_i = neigh_list.get_neighbors(i);
+  const X_FLOAT xtmp = x(i, 0);
+  const X_FLOAT ytmp = x(i, 1);
+  const X_FLOAT ztmp = x(i, 2);
+  const X_FLOAT radi = radius(i);
+  const int itype = type(i);
+
+  const int ibin = c_atom2bin(i);
+
+  const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil
+    = d_stencil;
+
+  const int mask_history = 3 << SBBITS;
+
+  // loop over all bins in neighborhood (includes ibin)
+  if(HalfNeigh)
+  for(int m = 0; m < c_bincount(ibin); m++) {
+    const int j = c_bins(ibin,m);
+    const int jtype = type(j);
+
+    //for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using HalfNeighborlists
+    if((j == i) || (HalfNeigh && !Newton && (j < i))  ||
+        (HalfNeigh && Newton && ((j < i) || ((j >= nlocal) &&
+                                       ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
+                                        (x(j, 2) == ztmp && x(j, 1)  == ytmp && x(j, 0) < xtmp)))))
+      ) continue;
+    if(exclude && exclusion(i,j,itype,jtype)) continue;
+
+    const X_FLOAT delx = xtmp - x(j, 0);
+    const X_FLOAT dely = ytmp - x(j, 1);
+    const X_FLOAT delz = ztmp - x(j, 2);
+    const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
+    const X_FLOAT radsum = radi + radius(j);
+    const X_FLOAT cutsq = (radsum + skin) * (radsum + skin);
+
+    if(rsq <= cutsq) {
+      if(n<neigh_list.maxneighs) {
+	if(neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history;
+	else neighbors_i(n++) = j;
+      }
+      else n++;
+    }
+  }
+
+  for(int k = 0; k < nstencil; k++) {
+    const int jbin = ibin + stencil[k];
+
+    // get subview of jbin
+    if(HalfNeigh && (ibin==jbin)) continue;
+    //const ArrayTypes<DeviceType>::t_int_1d_const_um =Kokkos::subview<t_int_1d_const_um>(bins,jbin,ALL);
+    for(int m = 0; m < c_bincount(jbin); m++) {
+
+      const int j = c_bins(jbin,m);
+      const int jtype = type(j);
+
+      if(HalfNeigh && !Newton && (j < i)) continue;
+      if(!HalfNeigh && j==i) continue;
+      if(Tri) {
+	if (x(j,2) < ztmp) continue;
+	if (x(j,2) == ztmp) {
+	  if (x(j,1) < ytmp) continue;
+	  if (x(j,1) == ytmp) {
+	    if (x(j,0) < xtmp) continue;
+	    if (x(j,0) == xtmp && j <= i) continue;
+	  }
+	}
+      }
+      if(exclude && exclusion(i,j,itype,jtype)) continue;
+
+      const X_FLOAT delx = xtmp - x(j, 0);
+      const X_FLOAT dely = ytmp - x(j, 1);
+      const X_FLOAT delz = ztmp - x(j, 2);
+      const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
+      const X_FLOAT radsum = radi + radius(j);
+      const X_FLOAT cutsq = (radsum + skin) * (radsum + skin);
+
+      if(rsq <= cutsq) {
+	if(n<neigh_list.maxneighs) {
+	  if(neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history;
+	  else neighbors_i(n++) = j;
+	}
+	else n++;
+      }
+    }
+  }
+
+  neigh_list.d_numneigh(i) = n;
+
+  if(n > neigh_list.maxneighs) {
+    resize() = 1;
+
+    if(n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
+  }
+
+  neigh_list.d_ilist(i) = i;
+}
+
+/* ---------------------------------------------------------------------- */
+
+#ifdef KOKKOS_HAVE_CUDA
+template<class DeviceType> template<int HalfNeigh,int Newton,int Tri>
+__device__ inline
+void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const
+{
+  /* loop over atoms in i's bin,
+   */
+  const int atoms_per_bin = c_bins.extent(1);
+  const int BINS_PER_TEAM = dev.team_size()/atoms_per_bin<1?1:dev.team_size()/atoms_per_bin;
+  const int TEAMS_PER_BIN = atoms_per_bin/dev.team_size()<1?1:atoms_per_bin/dev.team_size();
+  const int MY_BIN = dev.team_rank()/atoms_per_bin;
+
+  const int ibin = dev.league_rank()*BINS_PER_TEAM+MY_BIN;
+
+  if(ibin >=c_bincount.extent(0)) return;
+  X_FLOAT* other_x = sharedmem;
+  other_x = other_x + 6*atoms_per_bin*MY_BIN;
+
+  int* other_id = (int*) &other_x[5 * atoms_per_bin];
+
+  int bincount_current = c_bincount[ibin];
+
+  for(int kk = 0; kk < TEAMS_PER_BIN; kk++) {
+    const int MY_II = dev.team_rank()%atoms_per_bin+kk*dev.team_size();
+    const int i = MY_II < bincount_current ? c_bins(ibin, MY_II) : -1;
+    /* if necessary, goto next page and add pages */
+
+    int n = 0;
+
+    X_FLOAT xtmp;
+    X_FLOAT ytmp;
+    X_FLOAT ztmp;
+    X_FLOAT radi;
+    int itype;
+    const AtomNeighbors neighbors_i = neigh_list.get_neighbors((i>=0&&i<nlocal)?i:0);
+    const int mask_history = 3 << SBBITS;
+
+    if(i >= 0) {
+      xtmp = x(i, 0);
+      ytmp = x(i, 1);
+      ztmp = x(i, 2);
+      radi = radius(i);
+      itype = type(i);
+      other_x[MY_II] = xtmp;
+      other_x[MY_II + atoms_per_bin] = ytmp;
+      other_x[MY_II + 2 * atoms_per_bin] = ztmp;
+      other_x[MY_II + 3 * atoms_per_bin] = itype;
+      other_x[MY_II + 4 * atoms_per_bin] = radi;
+    }
+    other_id[MY_II] = i;
+    int test = (__syncthreads_count(i >= 0 && i <= nlocal) == 0);
+
+    if(test) return;
+
+    if(i >= 0 && i < nlocal) {
+      #pragma unroll 4
+      for(int m = 0; m < bincount_current; m++) {
+	int j = other_id[m];
+	const int jtype = other_x[m + 3 * atoms_per_bin];
+
+	//for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using halfneighborlists
+	if((j == i) ||
+	   (HalfNeigh && !Newton && (j < i))  ||
+	   (HalfNeigh && Newton &&
+            ((j < i) ||
+	     ((j >= nlocal) && ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
+				(x(j, 2) == ztmp && x(j, 1)  == ytmp && x(j, 0) < xtmp)))))
+	   ) continue;
+        if(Tri) {
+          if (x(j,2) < ztmp) continue;
+          if (x(j,2) == ztmp) {
+            if (x(j,1) < ytmp) continue;
+            if (x(j,1) == ytmp) {
+              if (x(j,0) < xtmp) continue;
+              if (x(j,0) == xtmp && j <= i) continue;
+            }
+          }
+        }
+	if(exclude && exclusion(i,j,itype,jtype)) continue;
+	const X_FLOAT delx = xtmp - other_x[m];
+	const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin];
+	const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin];
+	const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
+	const X_FLOAT radsum = radi + other_x[m + 4 * atoms_per_bin];
+	const X_FLOAT cutsq = (radsum + skin) * (radsum + skin);
+
+	if(rsq <= cutsq) {
+	  if(n<neigh_list.maxneighs) {
+	    if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history;
+	    else neighbors_i(n++) = j;
+	  }
+	  else n++;
+	}
+      }
+    }
+    __syncthreads();
+
+    const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil
+      = d_stencil;
+    for(int k = 0; k < nstencil; k++) {
+      const int jbin = ibin + stencil[k];
+
+      if(ibin == jbin) continue;
+
+      bincount_current = c_bincount[jbin];
+      int j = MY_II < bincount_current ? c_bins(jbin, MY_II) : -1;
+
+      if(j >= 0) {
+	other_x[MY_II] = x(j, 0);
+	other_x[MY_II + atoms_per_bin] = x(j, 1);
+	other_x[MY_II + 2 * atoms_per_bin] = x(j, 2);
+	other_x[MY_II + 3 * atoms_per_bin] = type(j);
+	other_x[MY_II + 4 * atoms_per_bin] = radius(j);
+      }
+
+      other_id[MY_II] = j;
+
+      __syncthreads();
+
+      if(i >= 0 && i < nlocal) {
+        #pragma unroll 8
+	for(int m = 0; m < bincount_current; m++) {
+	  const int j = other_id[m];
+	  const int jtype = other_x[m + 3 * atoms_per_bin];
+
+	  if(HalfNeigh && (j < i))  continue;
+	  if(HalfNeigh && !Newton && (j < i)) continue;
+	  if(!HalfNeigh && j==i) continue;
+	  if(Tri) {
+	    if (x(j,2) < ztmp) continue;
+	    if (x(j,2) == ztmp) {
+	      if (x(j,1) < ytmp) continue;
+	      if (x(j,1) == ytmp) {
+		if (x(j,0) < xtmp) continue;
+		if (x(j,0) == xtmp && j <= i) continue;
+	      }
+	    }
+	  }
+	  if(exclude && exclusion(i,j,itype,jtype)) continue;
+
+	  const X_FLOAT delx = xtmp - other_x[m];
+	  const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin];
+	  const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin];
+	  const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
+	  const X_FLOAT radsum = radi + other_x[m + 4 * atoms_per_bin];
+	  const X_FLOAT cutsq = (radsum + skin) * (radsum + skin);
+
+	  if(rsq <= cutsq) {
+	    if(n<neigh_list.maxneighs) {
+	      if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history;
+	      else neighbors_i(n++) = j;
+	    }
+	    else n++;
+	  }
+	}
+      }
+      __syncthreads();
+    }
+
+    if(i >= 0 && i < nlocal) {
+      neigh_list.d_numneigh(i) = n;
+      neigh_list.d_ilist(i) = i;
+    }
+
+    if(n > neigh_list.maxneighs) {
+      resize() = 1;
+
+      if(n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
+    }
+  }
+}
+#endif
+
 }
 
 namespace LAMMPS_NS {
-template class NPairKokkos<LMPDeviceType,0,0,0>;
-template class NPairKokkos<LMPDeviceType,0,1,0>;
-template class NPairKokkos<LMPDeviceType,1,0,0>;
-template class NPairKokkos<LMPDeviceType,1,1,0>;
-template class NPairKokkos<LMPDeviceType,1,0,1>;
+template class NPairKokkos<LMPDeviceType,0,0,0,0>;
+template class NPairKokkos<LMPDeviceType,0,1,0,0>;
+template class NPairKokkos<LMPDeviceType,1,0,0,0>;
+template class NPairKokkos<LMPDeviceType,1,1,0,0>;
+template class NPairKokkos<LMPDeviceType,1,0,1,0>;
+template class NPairKokkos<LMPDeviceType,1,0,0,1>;
+template class NPairKokkos<LMPDeviceType,1,0,1,1>;
 #ifdef KOKKOS_HAVE_CUDA
-template class NPairKokkos<LMPHostType,0,0,0>;
-template class NPairKokkos<LMPHostType,0,1,0>;
-template class NPairKokkos<LMPHostType,1,0,0>;
-template class NPairKokkos<LMPHostType,1,1,0>;
-template class NPairKokkos<LMPHostType,1,0,1>;
+template class NPairKokkos<LMPHostType,0,0,0,0>;
+template class NPairKokkos<LMPHostType,0,1,0,0>;
+template class NPairKokkos<LMPHostType,1,0,0,0>;
+template class NPairKokkos<LMPHostType,1,1,0,0>;
+template class NPairKokkos<LMPHostType,1,0,1,0>;
+template class NPairKokkos<LMPHostType,1,0,0,1>;
+template class NPairKokkos<LMPHostType,1,0,1,1>;
 #endif
 }
diff --git a/src/KOKKOS/npair_kokkos.h b/src/KOKKOS/npair_kokkos.h
index 6c1c0e958b49dcd09ff7bb6b32efca67b2d3f6a6..970e40c9fca0f2a54f2c6a342d5ac994b9d70fbc 100644
--- a/src/KOKKOS/npair_kokkos.h
+++ b/src/KOKKOS/npair_kokkos.h
@@ -13,56 +13,76 @@
 
 #ifdef NPAIR_CLASS
 
-typedef NPairKokkos<LMPHostType,0,0,0> NPairKokkosFullBinHost;
+typedef NPairKokkos<LMPHostType,0,0,0,0> NPairKokkosFullBinHost;
 NPairStyle(full/bin/kk/host,
            NPairKokkosFullBinHost,
            NP_FULL | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI)
 
-typedef NPairKokkos<LMPDeviceType,0,0,0> NPairKokkosFullBinDevice;
+typedef NPairKokkos<LMPDeviceType,0,0,0,0> NPairKokkosFullBinDevice;
 NPairStyle(full/bin/kk/device,
            NPairKokkosFullBinDevice,
            NP_FULL | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI)
 
-typedef NPairKokkos<LMPHostType,0,1,0> NPairKokkosFullBinGhostHost;
+typedef NPairKokkos<LMPHostType,0,1,0,0> NPairKokkosFullBinGhostHost;
 NPairStyle(full/bin/ghost/kk/host,
            NPairKokkosFullBinGhostHost,
            NP_FULL | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI)
 
-typedef NPairKokkos<LMPDeviceType,0,1,0> NPairKokkosFullBinGhostDevice;
+typedef NPairKokkos<LMPDeviceType,0,1,0,0> NPairKokkosFullBinGhostDevice;
 NPairStyle(full/bin/ghost/kk/device,
            NPairKokkosFullBinGhostDevice,
            NP_FULL | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI)
 
-typedef NPairKokkos<LMPHostType,1,0,0> NPairKokkosHalfBinHost;
+typedef NPairKokkos<LMPHostType,1,0,0,0> NPairKokkosHalfBinHost;
 NPairStyle(half/bin/kk/host,
            NPairKokkosHalfBinHost,
            NP_HALF | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_ORTHO)
 
-typedef NPairKokkos<LMPDeviceType,1,0,0> NPairKokkosHalfBinDevice;
+typedef NPairKokkos<LMPDeviceType,1,0,0,0> NPairKokkosHalfBinDevice;
 NPairStyle(half/bin/kk/device,
            NPairKokkosHalfBinDevice,
            NP_HALF | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_ORTHO)
 
-typedef NPairKokkos<LMPHostType,1,0,1> NPairKokkosHalfBinHostTri;
+typedef NPairKokkos<LMPHostType,1,0,1,0> NPairKokkosHalfBinHostTri;
 NPairStyle(half/bin/kk/host,
            NPairKokkosHalfBinHostTri,
            NP_HALF | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_TRI)
 
-typedef NPairKokkos<LMPDeviceType,1,0,1> NPairKokkosHalfBinDeviceTri;
+typedef NPairKokkos<LMPDeviceType,1,0,1,0> NPairKokkosHalfBinDeviceTri;
 NPairStyle(half/bin/kk/device,
            NPairKokkosHalfBinDeviceTri,
            NP_HALF | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_TRI)
 
-typedef NPairKokkos<LMPHostType,1,1,0> NPairKokkosHalfBinGhostHost;
+typedef NPairKokkos<LMPHostType,1,1,0,0> NPairKokkosHalfBinGhostHost;
 NPairStyle(half/bin/ghost/kk/host,
            NPairKokkosHalfBinGhostHost,
            NP_HALF | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI)
 
-typedef NPairKokkos<LMPDeviceType,1,1,0> NPairKokkosHalfBinGhostDevice;
+typedef NPairKokkos<LMPDeviceType,1,1,0,0> NPairKokkosHalfBinGhostDevice;
 NPairStyle(half/bin/ghost/kk/device,
            NPairKokkosHalfBinGhostDevice,
            NP_HALF | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI)
 
+typedef NPairKokkos<LMPHostType,1,0,0,1> NPairKokkosHalfSizeBinHost;
+NPairStyle(half/size/bin/kk/host,
+           NPairKokkosHalfSizeBinHost,
+           NP_HALF | NP_SIZE | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_ORTHO)
+
+typedef NPairKokkos<LMPDeviceType,1,0,0,1> NPairKokkosHalfSizeBinDevice;
+NPairStyle(half/size/bin/kk/device,
+           NPairKokkosHalfSizeBinDevice,
+           NP_HALF | NP_SIZE | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_ORTHO)
+
+typedef NPairKokkos<LMPHostType,1,0,1,1> NPairKokkosHalfSizeBinHostTri;
+NPairStyle(half/size/bin/kk/host,
+           NPairKokkosHalfSizeBinHostTri,
+           NP_HALF | NP_SIZE | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_TRI)
+
+typedef NPairKokkos<LMPDeviceType,1,0,1,1> NPairKokkosHalfSizeBinDeviceTri;
+NPairStyle(half/size/bin/kk/device,
+           NPairKokkosHalfSizeBinDeviceTri,
+           NP_HALF | NP_SIZE | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_TRI)
+
 #else
 
 #ifndef LMP_NPAIR_KOKKOS_H
@@ -73,7 +93,7 @@ NPairStyle(half/bin/ghost/kk/device,
 
 namespace LAMMPS_NS {
 
-template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI>
+template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI, int SIZE>
 class NPairKokkos : public NPair {
  public:
   NPairKokkos(class LAMMPS *);
@@ -162,6 +182,7 @@ class NeighborKokkosExecute
   // data from Atom class
 
   const typename AT::t_x_array_randomread x;
+  const typename AT::t_float_1d radius;
   const typename AT::t_int_1d_const type,mask;
   const typename AT::t_tagint_1d_const molecule;
   const typename AT::t_tagint_1d_const tag;
@@ -188,6 +209,9 @@ class NeighborKokkosExecute
   const int xperiodic, yperiodic, zperiodic;
   const int xprd_half, yprd_half, zprd_half;
 
+  // GRANULAR required member variables
+  const X_FLOAT skin;
+
   NeighborKokkosExecute(
                         const NeighListKokkos<DeviceType> &_neigh_list,
                         const typename AT::t_xfloat_2d_randomread &_cutneighsq,
@@ -199,6 +223,7 @@ class NeighborKokkosExecute
                         const typename AT::t_int_1d_3 &_d_stencilxyz,
                         const int _nlocal,
                         const typename AT::t_x_array_randomread &_x,
+                        const typename AT::t_float_1d &_radius,
                         const typename AT::t_int_1d_const &_type,
                         const typename AT::t_int_1d_const &_mask,
                         const typename AT::t_tagint_1d_const &_molecule,
@@ -225,13 +250,14 @@ class NeighborKokkosExecute
                         const typename AT::t_int_1d_const & _ex_mol_intra,
                         const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo,
                         const int & _xperiodic, const int & _yperiodic, const int & _zperiodic,
-                        const int & _xprd_half, const int & _yprd_half, const int & _zprd_half):
+                        const int & _xprd_half, const int & _yprd_half, const int & _zprd_half,
+                        const X_FLOAT _skin):
     neigh_list(_neigh_list), cutneighsq(_cutneighsq),
     bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins),
     atom2bin(_atom2bin),c_atom2bin(_atom2bin),
     nstencil(_nstencil),d_stencil(_d_stencil),d_stencilxyz(_d_stencilxyz),
     nlocal(_nlocal),
-    x(_x),type(_type),mask(_mask),molecule(_molecule),
+    x(_x),radius(_radius),type(_type),mask(_mask),molecule(_molecule),
     tag(_tag),special(_special),nspecial(_nspecial),molecular(_molecular),
     nbinx(_nbinx),nbiny(_nbiny),nbinz(_nbinz),
     mbinx(_mbinx),mbiny(_mbiny),mbinz(_mbinz),
@@ -245,7 +271,8 @@ class NeighborKokkosExecute
     ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit),
     ex_mol_intra(_ex_mol_intra),
     xperiodic(_xperiodic),yperiodic(_yperiodic),zperiodic(_zperiodic),
-    xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half) {
+    xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half),
+    skin(_skin) {
 
     if (molecular == 2) moltemplate = 1;
     else moltemplate = 0;
@@ -280,10 +307,18 @@ class NeighborKokkosExecute
   KOKKOS_FUNCTION
   void build_Item_Ghost(const int &i) const;
 
+  template<int HalfNeigh, int Newton, int Tri>
+  KOKKOS_FUNCTION
+  void build_ItemSize(const int &i) const;
+
 #ifdef KOKKOS_HAVE_CUDA
   template<int HalfNeigh, int Newton, int Tri>
   __device__ inline
   void build_ItemCuda(typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const;
+
+  template<int HalfNeigh, int Newton, int Tri>
+  __device__ inline
+  void build_ItemSizeCuda(typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const;
 #endif
 
   KOKKOS_INLINE_FUNCTION
@@ -399,6 +434,48 @@ struct NPairKokkosBuildFunctorGhost {
   }
 };
 
+template <class DeviceType, int HALF_NEIGH, int GHOST_NEWTON, int TRI>
+struct NPairKokkosBuildFunctorSize {
+  typedef DeviceType device_type;
+
+  const NeighborKokkosExecute<DeviceType> c;
+  const size_t sharedsize;
+
+  NPairKokkosBuildFunctorSize(const NeighborKokkosExecute<DeviceType> &_c,
+			      const size_t _sharedsize): c(_c), sharedsize(_sharedsize) {};
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int & i) const {
+    c.template build_ItemSize<HALF_NEIGH,GHOST_NEWTON,TRI>(i);
+  }
+
+#ifdef KOKKOS_HAVE_CUDA
+  __device__ inline
+  void operator() (typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const {
+    c.template build_ItemSizeCuda<HALF_NEIGH,GHOST_NEWTON,TRI>(dev);
+  }
+  size_t shmem_size(const int team_size) const { (void) team_size; return sharedsize; }
+#endif
+};
+
+template <int HALF_NEIGH, int GHOST_NEWTON, int TRI>
+struct NPairKokkosBuildFunctorSize<LMPHostType,HALF_NEIGH,GHOST_NEWTON,TRI> {
+  typedef LMPHostType device_type;
+
+  const NeighborKokkosExecute<LMPHostType> c;
+  const size_t sharedsize;
+
+  NPairKokkosBuildFunctorSize(const NeighborKokkosExecute<LMPHostType> &_c,
+			      const size_t _sharedsize): c(_c), sharedsize(_sharedsize) {};
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int & i) const {
+    c.template build_ItemSize<HALF_NEIGH,GHOST_NEWTON,TRI>(i);
+  }
+
+  void operator() (typename Kokkos::TeamPolicy<LMPHostType>::member_type dev) const {} // Should error out
+};
+
 }
 
 #endif
diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp
index 038d95394ac6947036e44e39c0f684251e5ddd31..3282c9da1e24057e8b8169faf8e525a035cf25f5 100644
--- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp
+++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp
@@ -186,7 +186,7 @@ void PairDPDfdtEnergyKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
   f = atomKK->k_f.view<DeviceType>();
   type = atomKK->k_type.view<DeviceType>();
   mass = atomKK->k_mass.view<DeviceType>();
-  rmass = atomKK->rmass;
+  rmass = atomKK->k_rmass.view<DeviceType>();
   dpdTheta = atomKK->k_dpdTheta.view<DeviceType>();
 
   k_cutsq.template sync<DeviceType>();
@@ -564,7 +564,7 @@ void PairDPDfdtEnergyKokkos<DeviceType>::operator()(TagPairDPDfdtEnergyComputeNo
         a_f(j,2) -= delz*fpair;
       }
 
-      if (rmass) {
+      if (rmass.data()) {
         mass_i = rmass[i];
         mass_j = rmass[j];
       } else {
diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h
index 12ffb668e200a01e51555376c78f235ee0739358..9d316df1524faad52fce1d96c8cadedfe553248d 100644
--- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h
+++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h
@@ -132,7 +132,7 @@ class PairDPDfdtEnergyKokkos : public PairDPDfdtEnergy {
   typename ArrayTypes<DeviceType>::t_f_array f;
   typename ArrayTypes<DeviceType>::t_int_1d_randomread type;
   typename ArrayTypes<DeviceType>::t_float_1d_randomread mass;
-  double *rmass;
+  typename ArrayTypes<DeviceType>::t_float_1d rmass;
   typename AT::t_efloat_1d dpdTheta;
   typename AT::t_efloat_1d d_duCond,d_duMech;
   HAT::t_efloat_1d h_duCond,h_duMech;
diff --git a/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp b/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..cd8beed583a4f29e3990a5723bed0e17cb243bca
--- /dev/null
+++ b/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp
@@ -0,0 +1,587 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "pair_gran_hooke_history_kokkos.h"
+#include "kokkos.h"
+#include "atom_kokkos.h"
+#include "atom_masks.h"
+#include "memory_kokkos.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "neigh_request.h"
+#include "error.h"
+#include "modify.h"
+#include "fix_neigh_history_kokkos.h"
+#include "update.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairGranHookeHistoryKokkos<DeviceType>::PairGranHookeHistoryKokkos(LAMMPS *lmp) : PairGranHookeHistory(lmp)
+{
+  atomKK = (AtomKokkos *) atom;
+  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
+  datamask_read = X_MASK | V_MASK | OMEGA_MASK | F_MASK | TORQUE_MASK | TYPE_MASK | MASK_MASK | ENERGY_MASK | VIRIAL_MASK | RMASS_MASK | RADIUS_MASK;
+  datamask_modify = F_MASK | TORQUE_MASK | ENERGY_MASK | VIRIAL_MASK;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairGranHookeHistoryKokkos<DeviceType>::~PairGranHookeHistoryKokkos()
+{
+  if (copymode) return;
+
+  if (allocated) {
+    memoryKK->destroy_kokkos(k_eatom,eatom);
+    memoryKK->destroy_kokkos(k_vatom,vatom);
+    eatom = NULL;
+    vatom = NULL;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairGranHookeHistoryKokkos<DeviceType>::init_style()
+{
+  if (history && fix_history == NULL) {
+    char dnumstr[16];
+    sprintf(dnumstr,"%d",3);
+    char **fixarg = new char*[4];
+    fixarg[0] = (char *) "NEIGH_HISTORY";
+    fixarg[1] = (char *) "all";
+    if (execution_space == Device)
+      fixarg[2] = (char *) "NEIGH_HISTORY/KK/DEVICE";
+    else
+      fixarg[2] = (char *) "NEIGH_HISTORY/KK/HOST";
+    fixarg[3] = dnumstr;
+    modify->add_fix(4,fixarg,1);
+    delete [] fixarg;
+    fix_history = (FixNeighHistory *) modify->fix[modify->nfix-1];
+    fix_history->pair = this;
+    fix_historyKK = (FixNeighHistoryKokkos<DeviceType> *)fix_history;
+  }
+
+  PairGranHookeHistory::init_style();
+
+  // irequest = neigh request made by parent class
+
+  neighflag = lmp->kokkos->neighflag;
+  int irequest = neighbor->nrequest - 1;
+
+  neighbor->requests[irequest]->
+    kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value &&
+    !Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
+  neighbor->requests[irequest]->
+    kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
+
+  if (neighflag == HALF || neighflag == HALFTHREAD) {
+    neighbor->requests[irequest]->full = 0;
+    neighbor->requests[irequest]->half = 1;
+  } else {
+    error->all(FLERR,"Cannot use chosen neighbor list style with gran/hooke/history/kk");
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairGranHookeHistoryKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
+{
+  copymode = 1;
+
+  eflag = eflag_in;
+  vflag = vflag_in;
+
+  if (neighflag == FULL) no_virial_fdotr_compute = 1;
+
+  if (eflag || vflag) ev_setup(eflag,vflag,0);
+  else evflag = vflag_fdotr = 0;
+
+  int shearupdate = 1;
+  if (update->setupflag) shearupdate = 0;
+
+  // reallocate per-atom arrays if necessary
+
+  if (eflag_atom) {
+    memoryKK->destroy_kokkos(k_eatom,eatom);
+    memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
+    d_eatom = k_eatom.view<DeviceType>();
+  }
+  if (vflag_atom) {
+    memoryKK->destroy_kokkos(k_vatom,vatom);
+    memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom");
+    d_vatom = k_vatom.view<DeviceType>();
+  }
+
+  atomKK->sync(execution_space,datamask_read);
+  if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
+  else atomKK->modified(execution_space,F_MASK | TORQUE_MASK);
+
+  x = atomKK->k_x.view<DeviceType>();
+  v = atomKK->k_v.view<DeviceType>();
+  omega = atomKK->k_omega.view<DeviceType>();
+  c_x = atomKK->k_x.view<DeviceType>();
+  f = atomKK->k_f.view<DeviceType>();
+  torque = atomKK->k_torque.view<DeviceType>();
+  type = atomKK->k_type.view<DeviceType>();
+  mask = atomKK->k_mask.view<DeviceType>();
+  tag = atomKK->k_tag.view<DeviceType>();
+  rmass = atomKK->k_rmass.view<DeviceType>();
+  radius = atomKK->k_radius.view<DeviceType>();
+  nlocal = atom->nlocal;
+  nall = atom->nlocal + atom->nghost;
+  newton_pair = force->newton_pair;
+  special_lj[0] = force->special_lj[0];
+  special_lj[1] = force->special_lj[1];
+  special_lj[2] = force->special_lj[2];
+  special_lj[3] = force->special_lj[3];
+
+  int inum = list->inum;
+  NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list);
+  d_numneigh = k_list->d_numneigh;
+  d_neighbors = k_list->d_neighbors;
+  d_ilist = k_list->d_ilist;
+
+  if (d_numneigh.extent(0) != d_numneigh_touch.extent(0))
+    d_numneigh_touch = typename AT::t_int_1d("pair:numneigh_touch",d_numneigh.extent(0));
+  if (d_neighbors.extent(0) != d_neighbors_touch.extent(0) ||
+      d_neighbors.extent(1) != d_neighbors_touch.extent(1))
+    d_neighbors_touch = typename AT::t_neighbors_2d("pair:neighbors_touch",d_neighbors.extent(0),d_neighbors.extent(1));
+
+  d_firsttouch = fix_historyKK->d_firstflag;
+  d_firstshear = fix_historyKK->d_firstvalue;
+
+  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryReduce>(0,inum),*this);
+
+  EV_FLOAT ev;
+
+  if (lmp->kokkos->neighflag == HALF) {
+    if (force->newton_pair) {
+      if (vflag_atom) {
+	if (shearupdate) {
+	  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,1,2,1>>(0,inum),*this);
+	} else {
+	  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,1,2,0>>(0,inum),*this);
+	}
+      } else if (vflag_global) {
+	if (shearupdate) {
+	  Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,1,1,1>>(0,inum),*this, ev);
+	} else {
+	  Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,1,1,0>>(0,inum),*this, ev);
+	}
+      } else {
+	if (shearupdate) {
+	  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,1,0,1>>(0,inum),*this);
+	} else {
+	  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,1,0,0>>(0,inum),*this);
+	}
+      }
+    } else {
+      if (vflag_atom) {
+	if (shearupdate) {
+	  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,0,2,1>>(0,inum),*this);
+	} else {
+	  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,0,2,0>>(0,inum),*this);
+	}
+      } else if (vflag_global) {
+	if (shearupdate) {
+	  Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,0,1,1>>(0,inum),*this, ev);
+	} else {
+	  Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,0,1,0>>(0,inum),*this, ev);
+	}
+      } else {
+	if (shearupdate) {
+	  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,0,0,1>>(0,inum),*this);
+	} else {
+	  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,0,0,0>>(0,inum),*this);
+	}
+      }
+    }
+  } else { // HALFTHREAD
+    if (force->newton_pair) {
+      if (vflag_atom) {
+	if (shearupdate) {
+	  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,1,2,1>>(0,inum),*this);
+	} else {
+	  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,1,2,0>>(0,inum),*this);
+	}
+      } else if (vflag_global) {
+	if (shearupdate) {
+	  Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,1,1,1>>(0,inum),*this, ev);
+	} else {
+	  Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,1,1,0>>(0,inum),*this, ev);
+	}
+      } else {
+	if (shearupdate) {
+	  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,1,0,1>>(0,inum),*this);
+	} else {
+	  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,1,0,0>>(0,inum),*this);
+	}
+      }
+    } else {
+      if (vflag_atom) {
+	if (shearupdate) {
+	  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,0,2,1>>(0,inum),*this);
+	} else {
+	  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,0,2,0>>(0,inum),*this);
+	}
+      } else if (vflag_global) {
+	if (shearupdate) {
+	  Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,0,1,1>>(0,inum),*this, ev);
+	} else {
+	  Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,0,1,0>>(0,inum),*this, ev);
+	}
+      } else {
+	if (shearupdate) {
+	  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,0,0,1>>(0,inum),*this);
+	} else {
+	  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,0,0,0>>(0,inum),*this);
+	}
+      }
+    }
+  }
+
+  if (vflag_global) {
+    virial[0] += ev.v[0];
+    virial[1] += ev.v[1];
+    virial[2] += ev.v[2];
+    virial[3] += ev.v[3];
+    virial[4] += ev.v[4];
+    virial[5] += ev.v[5];
+  }
+
+  if (vflag_atom) {
+    k_vatom.template modify<DeviceType>();
+    k_vatom.template sync<LMPHostType>();
+  }
+
+  if (vflag_fdotr) pair_virial_fdotr_compute(this);
+
+  copymode = 0;
+}
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryReduce, const int ii) const {
+  const int i = d_ilist[ii];
+  const X_FLOAT xtmp = x(i,0);
+  const X_FLOAT ytmp = x(i,1);
+  const X_FLOAT ztmp = x(i,2);
+  const LMP_FLOAT imass = rmass[i];
+  const LMP_FLOAT irad = radius[i];
+  const int jnum = d_numneigh[i];
+  int count = 0;
+
+  for (int jj = 0; jj < jnum; jj++) {
+    const int j = d_neighbors(i,jj) & NEIGHMASK;
+
+    const X_FLOAT delx = xtmp - x(j,0);
+    const X_FLOAT dely = ytmp - x(j,1);
+    const X_FLOAT delz = ztmp - x(j,2);
+    const X_FLOAT rsq = delx*delx + dely*dely + delz*delz;
+    const LMP_FLOAT jmass = rmass[j];
+    const LMP_FLOAT jrad = radius[j];
+    const LMP_FLOAT radsum = irad + jrad;
+
+    // check for touching neighbors
+
+    if (rsq >= radsum * radsum) {
+      d_firsttouch(i,jj) = 0;
+      d_firstshear(i,3*jj) = 0;
+      d_firstshear(i,3*jj+1) = 0;
+      d_firstshear(i,3*jj+2) = 0;
+    } else {
+      d_firsttouch(i,jj) = 1;
+      d_neighbors_touch(i,count++) = jj;
+    }
+  }
+  d_numneigh_touch[i] = count;
+}
+
+template<class DeviceType>
+template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int SHEARUPDATE>
+KOKKOS_INLINE_FUNCTION
+void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,SHEARUPDATE>, const int ii, EV_FLOAT &ev) const {
+
+  // The f and torque arrays are atomic for Half/Thread neighbor style
+  Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
+  Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_torque = torque;
+
+  const int i = d_ilist[ii];
+  const X_FLOAT xtmp = x(i,0);
+  const X_FLOAT ytmp = x(i,1);
+  const X_FLOAT ztmp = x(i,2);
+  const LMP_FLOAT imass = rmass[i];
+  const LMP_FLOAT irad = radius[i];
+  const int jnum = d_numneigh_touch[i];
+  
+  F_FLOAT fx_i = 0.0;
+  F_FLOAT fy_i = 0.0;
+  F_FLOAT fz_i = 0.0;
+
+  F_FLOAT torquex_i = 0.0;
+  F_FLOAT torquey_i = 0.0;
+  F_FLOAT torquez_i = 0.0;
+
+  for (int jj = 0; jj < jnum; jj++) {
+    const int m = d_neighbors_touch(i, jj);
+    const int j = d_neighbors(i, m) & NEIGHMASK;
+    
+    const X_FLOAT delx = xtmp - x(j,0);
+    const X_FLOAT dely = ytmp - x(j,1);
+    const X_FLOAT delz = ztmp - x(j,2);
+    const X_FLOAT rsq = delx*delx + dely*dely + delz*delz;
+    const LMP_FLOAT jmass = rmass[j];
+    const LMP_FLOAT jrad = radius[j];
+    const LMP_FLOAT radsum = irad + jrad;
+
+    // check for touching neighbors
+
+    const LMP_FLOAT r = sqrt(rsq);
+    const LMP_FLOAT rinv = 1.0/r;
+    const LMP_FLOAT rsqinv = 1/rsq;
+
+    // relative translational velocity
+
+    V_FLOAT vr1 = v(i,0) - v(j,0);
+    V_FLOAT vr2 = v(i,1) - v(j,1);
+    V_FLOAT vr3 = v(i,2) - v(j,2);
+      
+    // normal component
+
+    V_FLOAT vnnr = vr1*delx + vr2*dely + vr3*delz;
+    V_FLOAT vn1 = delx*vnnr * rsqinv;
+    V_FLOAT vn2 = dely*vnnr * rsqinv;
+    V_FLOAT vn3 = delz*vnnr * rsqinv;
+      
+    // tangential component
+      
+    V_FLOAT vt1 = vr1 - vn1;
+    V_FLOAT vt2 = vr2 - vn2;
+    V_FLOAT vt3 = vr3 - vn3;
+
+    // relative rotational velocity
+
+    V_FLOAT wr1 = (irad*omega(i,0) + jrad*omega(j,0)) * rinv;
+    V_FLOAT wr2 = (irad*omega(i,1) + jrad*omega(j,1)) * rinv;
+    V_FLOAT wr3 = (irad*omega(i,2) + jrad*omega(j,2)) * rinv;
+
+    LMP_FLOAT meff = imass*jmass / (imass+jmass);
+    if (mask[i] & freeze_group_bit) meff = jmass;
+    if (mask[j] & freeze_group_bit) meff = imass;
+
+    F_FLOAT damp = meff*gamman*vnnr*rsqinv;
+    F_FLOAT ccel = kn*(radsum-r)*rinv - damp;
+
+    // relative velocities
+
+    V_FLOAT vtr1 = vt1 - (delz*wr2-dely*wr3);
+    V_FLOAT vtr2 = vt2 - (delx*wr3-delz*wr1);
+    V_FLOAT vtr3 = vt3 - (dely*wr1-delx*wr2);
+    V_FLOAT vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3;
+    vrel = sqrt(vrel);
+
+    // shear history effects
+
+    X_FLOAT shear1 = d_firstshear(i,3*m);
+    X_FLOAT shear2 = d_firstshear(i,3*m+1);
+    X_FLOAT shear3 = d_firstshear(i,3*m+2);
+    if (SHEARUPDATE) {
+      shear1 += vtr1*dt;
+      shear2 += vtr2*dt;
+      shear3 += vtr3*dt;
+    }
+    X_FLOAT shrmag = sqrt(shear1*shear1 + shear2*shear2 +
+			  shear3*shear3);
+
+    // rotate shear displacements
+
+    X_FLOAT rsht = shear1*delx + shear2*dely + shear3*delz;
+    rsht *= rsqinv;
+    if (SHEARUPDATE) {
+      shear1 -= rsht*delx;
+      shear2 -= rsht*dely;
+      shear3 -= rsht*delz;
+    }
+
+    // tangential forces = shear + tangential velocity damping
+
+    F_FLOAT fs1 = - (kt*shear1 + meff*gammat*vtr1);
+    F_FLOAT fs2 = - (kt*shear2 + meff*gammat*vtr2);
+    F_FLOAT fs3 = - (kt*shear3 + meff*gammat*vtr3);
+
+    // rescale frictional displacements and forces if needed
+
+    F_FLOAT fs = sqrt(fs1*fs1 + fs2*fs2 + fs3*fs3);
+    F_FLOAT fn = xmu * fabs(ccel*r);
+
+    if (fs > fn) {
+      if (shrmag != 0.0) {
+	shear1 = (fn/fs) * (shear1 + meff*gammat*vtr1/kt) -
+	  meff*gammat*vtr1/kt;
+	shear2 = (fn/fs) * (shear2 + meff*gammat*vtr2/kt) -
+	  meff*gammat*vtr2/kt;
+	shear3 = (fn/fs) * (shear3 + meff*gammat*vtr3/kt) -
+	  meff*gammat*vtr3/kt;
+	fs1 *= fn/fs;
+	fs2 *= fn/fs;
+	fs3 *= fn/fs;
+      } else fs1 = fs2 = fs3 = 0.0;
+    }
+
+    if (SHEARUPDATE) {
+      d_firstshear(i,3*m) = shear1;
+      d_firstshear(i,3*m+1) = shear2;
+      d_firstshear(i,3*m+2) = shear3;
+    }
+
+    // forces & torques
+
+    F_FLOAT fx = delx*ccel + fs1;
+    F_FLOAT fy = dely*ccel + fs2;
+    F_FLOAT fz = delz*ccel + fs3;
+    fx_i += fx;
+    fy_i += fy;
+    fz_i += fz;
+
+    F_FLOAT tor1 = rinv * (dely*fs3 - delz*fs2);
+    F_FLOAT tor2 = rinv * (delz*fs1 - delx*fs3);
+    F_FLOAT tor3 = rinv * (delx*fs2 - dely*fs1);
+    torquex_i -= irad*tor1;
+    torquey_i -= irad*tor2;
+    torquez_i -= irad*tor3;
+      
+    if (NEWTON_PAIR || j < nlocal) {
+      a_f(j,0) -= fx;
+      a_f(j,1) -= fy;
+      a_f(j,2) -= fz;
+      a_torque(j,0) -= jrad*tor1;
+      a_torque(j,1) -= jrad*tor2;
+      a_torque(j,2) -= jrad*tor3;
+    }
+
+    if (EVFLAG == 2)
+      ev_tally_xyz_atom<NEIGHFLAG, NEWTON_PAIR>(ev, i, j, fx_i, fy_i, fz_i, delx, dely, delz);
+    if (EVFLAG == 1)
+      ev_tally_xyz<NEWTON_PAIR>(ev, i, j, fx_i, fy_i, fz_i, delx, dely, delz);
+  }
+
+  a_f(i,0) += fx_i;
+  a_f(i,1) += fy_i;
+  a_f(i,2) += fz_i;
+  a_torque(i,0) += torquex_i;
+  a_torque(i,1) += torquey_i;
+  a_torque(i,2) += torquez_i;
+}
+
+
+template<class DeviceType>
+template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int SHEARUPDATE>
+KOKKOS_INLINE_FUNCTION
+void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,SHEARUPDATE>, const int ii) const {
+  EV_FLOAT ev;
+  this->template operator()<NEIGHFLAG,NEWTON_PAIR,EVFLAG,SHEARUPDATE>(TagPairGranHookeHistoryCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,SHEARUPDATE>(), ii, ev);
+}
+
+template<class DeviceType>
+template<int NEWTON_PAIR>
+KOKKOS_INLINE_FUNCTION
+void PairGranHookeHistoryKokkos<DeviceType>::ev_tally_xyz(EV_FLOAT &ev, int i, int j,
+							  F_FLOAT fx, F_FLOAT fy, F_FLOAT fz,
+							  X_FLOAT delx, X_FLOAT dely, X_FLOAT delz) const
+{
+  F_FLOAT v[6];
+
+  v[0] = delx*fx;
+  v[1] = dely*fy;
+  v[2] = delz*fz;
+  v[3] = delx*fy;
+  v[4] = delx*fz;
+  v[5] = dely*fz;
+
+  if (NEWTON_PAIR) {
+    ev.v[0] += v[0];
+    ev.v[1] += v[1];
+    ev.v[2] += v[2];
+    ev.v[3] += v[3];
+    ev.v[4] += v[4];
+    ev.v[5] += v[5];
+  } else {
+    if (i < nlocal) {
+      ev.v[0] += 0.5*v[0];
+      ev.v[1] += 0.5*v[1];
+      ev.v[2] += 0.5*v[2];
+      ev.v[3] += 0.5*v[3];
+      ev.v[4] += 0.5*v[4];
+      ev.v[5] += 0.5*v[5];
+    }
+    if (j < nlocal) {
+      ev.v[0] += 0.5*v[0];
+      ev.v[1] += 0.5*v[1];
+      ev.v[2] += 0.5*v[2];
+      ev.v[3] += 0.5*v[3];
+      ev.v[4] += 0.5*v[4];
+      ev.v[5] += 0.5*v[5];
+    }
+  }
+}
+
+template<class DeviceType>
+template<int NEIGHFLAG, int NEWTON_PAIR>
+KOKKOS_INLINE_FUNCTION
+void PairGranHookeHistoryKokkos<DeviceType>::ev_tally_xyz_atom(EV_FLOAT &ev, int i, int j,
+							       F_FLOAT fx, F_FLOAT fy, F_FLOAT fz,
+							       X_FLOAT delx, X_FLOAT dely, X_FLOAT delz) const
+{
+  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_vatom = k_vatom.view<DeviceType>();  
+
+  F_FLOAT v[6];
+
+  v[0] = delx*fx;
+  v[1] = dely*fy;
+  v[2] = delz*fz;
+  v[3] = delx*fy;
+  v[4] = delx*fz;
+  v[5] = dely*fz;
+
+  if (NEWTON_PAIR || i < nlocal) {
+    v_vatom(i,0) += 0.5*v[0];
+    v_vatom(i,1) += 0.5*v[1];
+    v_vatom(i,2) += 0.5*v[2];
+    v_vatom(i,3) += 0.5*v[3];
+    v_vatom(i,4) += 0.5*v[4];
+    v_vatom(i,5) += 0.5*v[5];
+  }
+  if (NEWTON_PAIR || j < nlocal) {
+    v_vatom(j,0) += 0.5*v[0];
+    v_vatom(j,1) += 0.5*v[1];
+    v_vatom(j,2) += 0.5*v[2];
+    v_vatom(j,3) += 0.5*v[3];
+    v_vatom(j,4) += 0.5*v[4];
+    v_vatom(j,5) += 0.5*v[5];
+  }
+}
+
+namespace LAMMPS_NS {
+template class PairGranHookeHistoryKokkos<LMPDeviceType>;
+#ifdef KOKKOS_HAVE_CUDA
+template class PairGranHookeHistoryKokkos<LMPHostType>;
+#endif
+}
diff --git a/src/KOKKOS/pair_gran_hooke_history_kokkos.h b/src/KOKKOS/pair_gran_hooke_history_kokkos.h
new file mode 100644
index 0000000000000000000000000000000000000000..822b9203a46ee5945c01eec2e353ca67be42385b
--- /dev/null
+++ b/src/KOKKOS/pair_gran_hooke_history_kokkos.h
@@ -0,0 +1,150 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(gran/hooke/history/kk,PairGranHookeHistoryKokkos<LMPDeviceType>)
+PairStyle(gran/hooke/history/kk/device,PairGranHookeHistoryKokkos<LMPDeviceType>)
+PairStyle(gran/hooke/history/kk/host,PairGranHookeHistoryKokkos<LMPHostType>)
+
+#else
+
+#ifndef LMP_PAIR_GRAN_HOOKE_HISTORY_KOKKOS_H
+#define LMP_PAIR_GRAN_HOOKE_HISTORY_KOKKOS_H
+
+#include "pair_gran_hooke_history.h"
+#include "pair_kokkos.h"
+#include "kokkos_type.h"
+
+namespace LAMMPS_NS {
+
+template <class DeviceType>
+class FixNeighHistoryKokkos;
+  
+template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int SHEARUPDATE>
+struct TagPairGranHookeHistoryCompute {};
+
+struct TagPairGranHookeHistoryReduce {};
+
+template <class DeviceType>
+class PairGranHookeHistoryKokkos : public PairGranHookeHistory {
+ public:
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+  typedef EV_FLOAT value_type;
+
+  PairGranHookeHistoryKokkos(class LAMMPS *);
+  virtual ~PairGranHookeHistoryKokkos();
+  virtual void compute(int, int);
+  void init_style();
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairGranHookeHistoryReduce, const int ii) const;
+
+  template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int SHEARUPDATE>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairGranHookeHistoryCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,SHEARUPDATE>, const int, EV_FLOAT &ev) const;
+  template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int SHEARUPDATE>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairGranHookeHistoryCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,SHEARUPDATE>, const int) const;
+
+  template<int NEWTON_PAIR>
+  KOKKOS_INLINE_FUNCTION
+  void ev_tally_xyz(EV_FLOAT &ev, int i, int j,
+		    F_FLOAT fx, F_FLOAT fy, F_FLOAT fz,
+		    X_FLOAT delx, X_FLOAT dely, X_FLOAT delz) const;
+  template<int NEIGHFLAG, int NEWTON_PAIR>
+  KOKKOS_INLINE_FUNCTION
+  void ev_tally_xyz_atom(EV_FLOAT &ev, int i, int j,
+			 F_FLOAT fx, F_FLOAT fy, F_FLOAT fz,
+			 X_FLOAT delx, X_FLOAT dely, X_FLOAT delz) const;
+    
+ protected:
+  typename AT::t_x_array_randomread x;
+  typename AT::t_x_array c_x;
+  typename AT::t_v_array_randomread v;
+  typename AT::t_v_array_randomread omega;
+  typename AT::t_f_array f;
+  typename AT::t_f_array torque;
+  typename AT::t_int_1d_randomread type;
+  typename AT::t_int_1d_randomread mask;
+  typename AT::t_float_1d_randomread rmass;
+  typename AT::t_float_1d_randomread radius;
+
+  DAT::tdual_efloat_1d k_eatom;
+  DAT::tdual_virial_array k_vatom;
+  typename AT::t_efloat_1d d_eatom;
+  typename AT::t_virial_array d_vatom;
+  typename AT::t_tagint_1d tag;
+
+  typename AT::t_neighbors_2d d_neighbors;
+  typename AT::t_int_1d_randomread d_ilist;
+  typename AT::t_int_1d_randomread d_numneigh;
+
+  typename Kokkos::View<int**> d_firsttouch;
+  typename Kokkos::View<LMP_FLOAT**> d_firstshear;
+
+  typename AT::t_neighbors_2d d_neighbors_touch;
+  typename AT::t_int_1d d_numneigh_touch;
+  
+  int newton_pair;
+  double special_lj[4];
+
+  int neighflag;
+  int nlocal,nall,eflag,vflag;
+
+  FixNeighHistoryKokkos<DeviceType> *fix_historyKK;
+
+  friend void pair_virial_fdotr_compute<PairGranHookeHistoryKokkos>(PairGranHookeHistoryKokkos*);
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+E: Illegal ... command
+
+Self-explanatory.  Check the input script syntax and compare to the
+documentation for the command.  You can use -echo screen as a
+command-line option when running LAMMPS to see the offending line.
+
+E: Incorrect args for pair coefficients
+
+Self-explanatory.  Check the input script or data file.
+
+E: Pair granular requires atom attributes radius, rmass
+
+The atom style defined does not have these attributes.
+
+E: Pair granular requires ghost atoms store velocity
+
+Use the comm_modify vel yes command to enable this.
+
+E: Could not find pair fix neigh history ID
+
+UNDOCUMENTED
+
+U: Pair granular with shear history requires newton pair off
+
+This is a current restriction of the implementation of pair
+granular styles with history.
+
+U: Could not find pair fix ID
+
+A fix is created internally by the pair style to store shear
+history information.  You cannot delete it.
+
+*/
diff --git a/src/KOKKOS/verlet_kokkos.cpp b/src/KOKKOS/verlet_kokkos.cpp
index 136e543a2cd708501004d2f94290b21454dabe0a..f56e6015d014018550e636dd1c8dfcb414c7e8e5 100644
--- a/src/KOKKOS/verlet_kokkos.cpp
+++ b/src/KOKKOS/verlet_kokkos.cpp
@@ -55,6 +55,20 @@ struct ForceAdder {
 
 /* ---------------------------------------------------------------------- */
 
+template<class View>
+struct Zero {
+  View v;
+  Zero(const View &v_):v(v_) {}
+  KOKKOS_INLINE_FUNCTION
+  void operator()(const int &i) const {
+    v(i,0) = 0;
+    v(i,1) = 0;
+    v(i,2) = 0;
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
 VerletKokkos::VerletKokkos(LAMMPS *lmp, int narg, char **arg) :
   Verlet(lmp, narg, arg)
 {
@@ -120,6 +134,7 @@ void VerletKokkos::setup(int flag)
   atomKK->modified(Host,ALL_MASK);
 
   neighbor->build(1);
+  modify->setup_post_neighbor();
   neighbor->ncalls = 0;
 
   // compute all forces
@@ -175,7 +190,7 @@ void VerletKokkos::setup(int flag)
   modify->setup(vflag);
   output->setup(flag);
   lmp->kokkos->auto_sync = 1;
-  update->setupflag = 1;
+  update->setupflag = 0;
 }
 
 /* ----------------------------------------------------------------------
@@ -223,6 +238,7 @@ void VerletKokkos::setup_minimal(int flag)
     atomKK->modified(Host,ALL_MASK);
 
     neighbor->build(1);
+    modify->setup_post_neighbor();
     neighbor->ncalls = 0;
   }
 
@@ -567,32 +583,25 @@ void VerletKokkos::run(int n)
 
 void VerletKokkos::force_clear()
 {
-  int i;
-
   if (external_force_clear) return;
 
+  atomKK->k_f.modified_host() = 0; // ignore host forces/torques since device views
+  atomKK->k_torque.modified_host() = 0; //   will be cleared below
+
   // clear force on all particles
   // if either newton flag is set, also include ghosts
   // when using threads always clear all forces.
 
   if (neighbor->includegroup == 0) {
-    int nall;
-    if (force->newton) nall = atomKK->nlocal + atomKK->nghost;
-    else nall = atomKK->nlocal;
-
-    size_t nbytes = sizeof(double) * nall;
+    int nall = atomKK->nlocal;
+    if (force->newton) nall += atomKK->nghost;
 
-    if (nbytes) {
-      if (atomKK->k_f.modified_host() > atomKK->k_f.modified_device()) {
-        memset_kokkos(atomKK->k_f.view<LMPHostType>());
-        atomKK->modified(Host,F_MASK);
-        atomKK->sync(Device,F_MASK);
-      } else {
-        memset_kokkos(atomKK->k_f.view<LMPDeviceType>());
-        atomKK->modified(Device,F_MASK);
-      }
-      if (torqueflag)  memset(&(atomKK->torque[0][0]),0,3*nbytes);
+    Kokkos::parallel_for(nall, Zero<typename ArrayTypes<LMPDeviceType>::t_f_array>(atomKK->k_f.view<LMPDeviceType>()));
+    atomKK->modified(Device,F_MASK);
 
+    if (torqueflag) {
+      Kokkos::parallel_for(nall, Zero<typename ArrayTypes<LMPDeviceType>::t_f_array>(atomKK->k_torque.view<LMPDeviceType>()));
+      atomKK->modified(Device,TORQUE_MASK);
     }
 
   // neighbor includegroup flag is set
@@ -600,35 +609,23 @@ void VerletKokkos::force_clear()
   // if either newton flag is set, also include ghosts
 
   } else {
-    int nall = atomKK->nfirst;
-    if (atomKK->k_f.modified_host() > atomKK->k_f.modified_device()) {
-      memset_kokkos(atomKK->k_f.view<LMPHostType>());
-      atomKK->modified(Host,F_MASK);
-    } else {
-      memset_kokkos(atomKK->k_f.view<LMPDeviceType>());
-      atomKK->modified(Device,F_MASK);
-    }
+    Kokkos::parallel_for(atomKK->nfirst, Zero<typename ArrayTypes<LMPDeviceType>::t_f_array>(atomKK->k_f.view<LMPDeviceType>()));
+    atomKK->modified(Device,F_MASK);
+
     if (torqueflag) {
-      double **torque = atomKK->torque;
-      for (i = 0; i < nall; i++) {
-        torque[i][0] = 0.0;
-        torque[i][1] = 0.0;
-        torque[i][2] = 0.0;
-      }
+      Kokkos::parallel_for(atomKK->nfirst, Zero<typename ArrayTypes<LMPDeviceType>::t_f_array>(atomKK->k_torque.view<LMPDeviceType>()));
+      atomKK->modified(Device,TORQUE_MASK);
     }
 
     if (force->newton) {
-      nall = atomKK->nlocal + atomKK->nghost;
+      auto range = Kokkos::RangePolicy<LMPDeviceType>(atomKK->nlocal, atomKK->nlocal + atomKK->nghost);
+      Kokkos::parallel_for(range, Zero<typename ArrayTypes<LMPDeviceType>::t_f_array>(atomKK->k_f.view<LMPDeviceType>()));
+      atomKK->modified(Device,F_MASK);
 
       if (torqueflag) {
-        double **torque = atomKK->torque;
-        for (i = atomKK->nlocal; i < nall; i++) {
-          torque[i][0] = 0.0;
-          torque[i][1] = 0.0;
-          torque[i][2] = 0.0;
-        }
+	Kokkos::parallel_for(range, Zero<typename ArrayTypes<LMPDeviceType>::t_f_array>(atomKK->k_torque.view<LMPDeviceType>()));
+	atomKK->modified(Device,TORQUE_MASK);
       }
-
     }
   }
 }
diff --git a/src/USER-VTK/dump_vtk.cpp b/src/USER-VTK/dump_vtk.cpp
index 5f2e3d62ec433e4cd77e80852aa1e3f5e88ad913..edbc647a81857c954ac4ed902d98de4063975b84 100644
--- a/src/USER-VTK/dump_vtk.cpp
+++ b/src/USER-VTK/dump_vtk.cpp
@@ -93,10 +93,10 @@ enum{VTK,VTP,VTU,PVTP,PVTU}; // file formats
 #define ONEFIELD 32
 #define DELTA 1048576
 
-#if VTK_MAJOR_VERSION == 7
+#if VTK_MAJOR_VERSION >= 7
 #define InsertNextTupleValue InsertNextTypedTuple
-#elif VTK_MAJOR_VERSION > 7
-#error This code has only been tested with VTK 5, 6, and 7
+#elif VTK_MAJOR_VERSION > 8
+#error This code has only been tested with VTK 5, 6, 7, and 8
 #endif
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/comm_brick.cpp b/src/comm_brick.cpp
index 4015240af890523eb23ef7acfd1143a72880f87c..e96b0f38e3ac4576eaabab03feef88cc4c831c24 100644
--- a/src/comm_brick.cpp
+++ b/src/comm_brick.cpp
@@ -611,7 +611,7 @@ void CommBrick::exchange()
   maxexchange = maxexchange_atom + maxexchange_fix;
   bufextra = maxexchange + BUFEXTRA;
   if (bufextra > bufextra_old)
-    memory->grow(buf_send,maxsend+bufextra,"comm:buf_send");
+    grow_send(maxsend+bufextra,1);
 
   // subbox bounds for orthogonal or triclinic
 
diff --git a/src/fix_gravity.cpp b/src/fix_gravity.cpp
index c3e73d86c935dfbe497f9285004ea588162d1675..bc10eb280782bf83023b24a1202336e4b3e5ddf8 100644
--- a/src/fix_gravity.cpp
+++ b/src/fix_gravity.cpp
@@ -141,6 +141,8 @@ FixGravity::FixGravity(LAMMPS *lmp, int narg, char **arg) :
 
 FixGravity::~FixGravity()
 {
+  if (copymode) return;
+
   delete [] mstr;
   delete [] vstr;
   delete [] pstr;
diff --git a/src/fix_gravity.h b/src/fix_gravity.h
index 35be23a40a12e37d35224f792426f34c6eafc538..1f18ee274be4fe57225112602b44c6033e1f1223 100644
--- a/src/fix_gravity.h
+++ b/src/fix_gravity.h
@@ -29,7 +29,7 @@ class FixGravity : public Fix {
 
  public:
   FixGravity(class LAMMPS *, int, char **);
-  ~FixGravity();
+  virtual ~FixGravity();
   int setmask();
   void init();
   void setup(int);
diff --git a/src/fix_neigh_history.cpp b/src/fix_neigh_history.cpp
index 9661409a6a1cbde20beacd3e3f1ff63605273b7b..c21b494aa41d0d515535afe9c64e7ef9c37f4273 100644
--- a/src/fix_neigh_history.cpp
+++ b/src/fix_neigh_history.cpp
@@ -95,6 +95,8 @@ FixNeighHistory::FixNeighHistory(LAMMPS *lmp, int narg, char **arg) :
 
 FixNeighHistory::~FixNeighHistory()
 {
+  if (copymode) return;
+
   // unregister this fix so atom class doesn't invoke it any more
 
   atom->delete_callback(id,0);