diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh
index 9378eccfc7c8f35b042d9816b68221e84f7bc935..8f6e21bf8b464b5f2863d1ad25f7f8f6ed09a327 100644
--- a/src/KOKKOS/Install.sh
+++ b/src/KOKKOS/Install.sh
@@ -40,12 +40,12 @@ if (test $1 = 1) then
   if (test -e ../Makefile.package) then
     sed -i -e 's/[^ \t]*kokkos[^ \t]* //g' ../Makefile.package
     sed -i -e 's/[^ \t]*KOKKOS[^ \t]* //g' ../Makefile.package
-    sed -i -e 's|^PKG_INC =[ \t]*|&-I..\/..\/lib\/kokkos\/core\/src -I../../lib/kokkos/containers/src -DLMP_KOKKOS |' ../Makefile.package
-    sed -i -e 's|^PKG_PATH =[ \t]*|&-L..\/..\/lib\/kokkos\/core\/src |' ../Makefile.package
+    sed -i -e 's|^PKG_INC =[ \t]*|&-DLMP_KOKKOS |' ../Makefile.package
+#    sed -i -e 's|^PKG_PATH =[ \t]*|&-L..\/..\/lib\/kokkos\/core\/src |' ../Makefile.package
     sed -i -e 's|^PKG_LIB =[ \t]*|&-lkokkoscore |' ../Makefile.package
-    sed -i -e 's|^PKG_SYSINC =[ \t]*|&$(kokkos_SYSINC) |' ../Makefile.package
-    sed -i -e 's|^PKG_SYSLIB =[ \t]*|&$(kokkos_SYSLIB) |' ../Makefile.package
-    sed -i -e 's|^PKG_SYSPATH =[ \t]*|&$(kokkos_SYSPATH) |' ../Makefile.package
+    sed -i -e 's|^PKG_SYSINC =[ \t]*|&$(KOKKOS_INC) |' ../Makefile.package
+    sed -i -e 's|^PKG_SYSLIB =[ \t]*|&$(KOKKOS_LINK) |' ../Makefile.package
+#    sed -i -e 's|^PKG_SYSPATH =[ \t]*|&$(kokkos_SYSPATH) |' ../Makefile.package
   fi
 
   if (test -e ../Makefile.package.settings) then
diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp
index 14dbd7e10e9635f3b4342523c25e360f7a298e36..89c7eea007d7a3f09fdde7d161c96869f55feb72 100644
--- a/src/KOKKOS/atom_kokkos.cpp
+++ b/src/KOKKOS/atom_kokkos.cpp
@@ -37,31 +37,43 @@ AtomKokkos::AtomKokkos(LAMMPS *lmp) : Atom(lmp)
 
 AtomKokkos::~AtomKokkos()
 {
-  k_tag = DAT::tdual_tagint_1d();
-  k_mask = DAT::tdual_int_1d();
-  k_type = DAT::tdual_int_1d();
-  k_image = DAT::tdual_imageint_1d();
-  k_molecule = DAT::tdual_tagint_1d();
-
-  k_x = DAT::tdual_x_array();
-  k_v = DAT::tdual_v_array();
-  k_f = DAT::tdual_f_array();
-
-  k_mass = DAT::tdual_float_1d();
-
-  tag = NULL;
-  mask = NULL;
-  type = NULL;
-  image = NULL;
-  molecule = NULL;
-  mass = NULL;
-
-  memory->sfree(x);
-  memory->sfree(v);
-  memory->sfree(f);
-  x = NULL;
-  v = NULL;
-  f = NULL;
+  memory->destroy_kokkos(k_tag, tag);
+  memory->destroy_kokkos(k_mask, mask);
+  memory->destroy_kokkos(k_type, type);
+  memory->destroy_kokkos(k_image, image);
+  memory->destroy_kokkos(k_molecule, molecule);
+
+  memory->destroy_kokkos(k_x, x);
+  memory->destroy_kokkos(k_v, v);
+  memory->destroy_kokkos(k_f, f);
+
+  memory->destroy_kokkos(k_mass, mass);
+
+  memory->destroy_kokkos(k_q,q);
+
+  memory->destroy_kokkos(k_nspecial, nspecial);
+  memory->destroy_kokkos(k_special, special);
+  memory->destroy_kokkos(k_num_bond, num_bond);
+  memory->destroy_kokkos(k_bond_type, bond_type);
+  memory->destroy_kokkos(k_bond_atom, bond_atom);
+  memory->destroy_kokkos(k_num_angle, num_angle);
+  memory->destroy_kokkos(k_angle_type, angle_type);
+  memory->destroy_kokkos(k_angle_atom1, angle_atom1);
+  memory->destroy_kokkos(k_angle_atom2, angle_atom2);
+  memory->destroy_kokkos(k_angle_atom3, angle_atom3);
+  memory->destroy_kokkos(k_num_dihedral, num_dihedral);
+  memory->destroy_kokkos(k_dihedral_type, dihedral_type);
+  memory->destroy_kokkos(k_dihedral_atom1, dihedral_atom1);
+  memory->destroy_kokkos(k_dihedral_atom2, dihedral_atom2);
+  memory->destroy_kokkos(k_dihedral_atom3, dihedral_atom3);
+  memory->destroy_kokkos(k_dihedral_atom4, dihedral_atom4);
+  memory->destroy_kokkos(k_num_improper, num_improper);
+  memory->destroy_kokkos(k_improper_type, improper_type);
+  memory->destroy_kokkos(k_improper_atom1, improper_atom1);
+  memory->destroy_kokkos(k_improper_atom2, improper_atom2);
+  memory->destroy_kokkos(k_improper_atom3, improper_atom3);
+  memory->destroy_kokkos(k_improper_atom4, improper_atom4);
+
 }
 
 /* ---------------------------------------------------------------------- */
@@ -96,9 +108,6 @@ void AtomKokkos::sort()
 {
   int i,m,n,ix,iy,iz,ibin,empty;
 
-  sync(Host,ALL_MASK);
-  modified(Host,ALL_MASK);
-
   // set next timestep for sorting to take place
 
   nextsort = (update->ntimestep/sortfreq)*sortfreq + sortfreq;
@@ -122,6 +131,9 @@ void AtomKokkos::sort()
 
   if (nlocal == nmax) avec->grow(0);
 
+  sync(Host,ALL_MASK);
+  modified(Host,ALL_MASK);
+
   // bin atoms in reverse order so linked list will be in forward order
 
   for (i = 0; i < nbins; i++) binhead[i] = -1;
@@ -188,3 +200,43 @@ void AtomKokkos::sort()
   //MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world);
   //if (flagall) error->all(FLERR,"Atom sort did not operate correctly");
 }
+
+/* ----------------------------------------------------------------------
+   reallocate memory to the pointer selected by the mask
+------------------------------------------------------------------------- */
+void AtomKokkos::grow(unsigned int mask){
+
+  if (mask && SPECIAL_MASK){
+    memory->destroy_kokkos(k_special, special);
+    sync(Device, mask);
+    modified(Device, mask);
+    memory->grow_kokkos(k_special,special,nmax,maxspecial,"atom:special");
+    avec->grow_reset();
+    sync(Host, mask);
+  }
+
+}
+
+void AtomKokkos::deallocate_topology()
+{
+  memory->destroy_kokkos(k_bond_type, bond_type);
+  memory->destroy_kokkos(k_bond_atom, bond_atom);
+
+  memory->destroy_kokkos(k_angle_type, angle_type);
+  memory->destroy_kokkos(k_angle_atom1, angle_atom1);
+  memory->destroy_kokkos(k_angle_atom2, angle_atom2);
+  memory->destroy_kokkos(k_angle_atom3, angle_atom3);
+
+  memory->destroy_kokkos(k_dihedral_type, dihedral_type);
+  memory->destroy_kokkos(k_dihedral_atom1, dihedral_atom1);
+  memory->destroy_kokkos(k_dihedral_atom2, dihedral_atom2);
+  memory->destroy_kokkos(k_dihedral_atom3, dihedral_atom3);
+  memory->destroy_kokkos(k_dihedral_atom4, dihedral_atom4);
+
+  memory->destroy_kokkos(k_improper_type, improper_type);
+  memory->destroy_kokkos(k_improper_atom1, improper_atom1);
+  memory->destroy_kokkos(k_improper_atom2, improper_atom2);
+  memory->destroy_kokkos(k_improper_atom3, improper_atom3);
+  memory->destroy_kokkos(k_improper_atom4, improper_atom4);
+}
+
diff --git a/src/KOKKOS/atom_kokkos.h b/src/KOKKOS/atom_kokkos.h
index 6d75f7edd4b774ec68a78466924739fcccbdceb1..2abf288793fd1834edd6c93808ab5251cd0bcb0f 100644
--- a/src/KOKKOS/atom_kokkos.h
+++ b/src/KOKKOS/atom_kokkos.h
@@ -21,15 +21,33 @@ namespace LAMMPS_NS {
 
 class AtomKokkos : public Atom {
  public:
-  DAT::tdual_tagint_1d k_tag, k_molecule;
-  DAT::tdual_imageint_1d k_image;
+  DAT::tdual_tagint_1d k_tag;
   DAT::tdual_int_1d k_type, k_mask;
+  DAT::tdual_imageint_1d k_image;
   DAT::tdual_x_array k_x;
   DAT::tdual_v_array k_v;
   DAT::tdual_f_array k_f;
 
+
   DAT::tdual_float_1d k_mass;
 
+  DAT::tdual_float_1d k_q;
+  DAT::tdual_tagint_1d k_molecule;
+  DAT::tdual_int_2d k_nspecial;
+  DAT::tdual_tagint_2d k_special;
+  DAT::tdual_int_1d k_num_bond;
+  DAT::tdual_int_2d k_bond_type;
+  DAT::tdual_tagint_2d k_bond_atom;
+  DAT::tdual_int_1d k_num_angle;
+  DAT::tdual_int_2d k_angle_type;
+  DAT::tdual_tagint_2d k_angle_atom1, k_angle_atom2, k_angle_atom3;
+  DAT::tdual_int_1d k_num_dihedral;
+  DAT::tdual_int_2d k_dihedral_type;
+  DAT::tdual_tagint_2d k_dihedral_atom1, k_dihedral_atom2, k_dihedral_atom3, k_dihedral_atom4;
+  DAT::tdual_int_1d k_num_improper;
+  DAT::tdual_int_2d k_improper_type;
+  DAT::tdual_tagint_2d k_improper_atom1, k_improper_atom2, k_improper_atom3, k_improper_atom4;
+
   AtomKokkos(class LAMMPS *);
   ~AtomKokkos();
 
@@ -37,6 +55,8 @@ class AtomKokkos : public Atom {
   void sync(const ExecutionSpace space, unsigned int mask);
   void modified(const ExecutionSpace space, unsigned int mask);
   virtual void sort();
+  virtual void grow(unsigned int mask);
+  virtual void deallocate_topology();
 };
 
 template<class ViewType, class IndexView>
diff --git a/src/KOKKOS/atom_vec_angle_kokkos.cpp b/src/KOKKOS/atom_vec_angle_kokkos.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..86ef5509a5dbea09ca3a24ccd08384175da1adc0
--- /dev/null
+++ b/src/KOKKOS/atom_vec_angle_kokkos.cpp
@@ -0,0 +1,1886 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "stdlib.h"
+#include "atom_vec_angle_kokkos.h"
+#include "atom_kokkos.h"
+#include "comm_kokkos.h"
+#include "domain.h"
+#include "modify.h"
+#include "fix.h"
+#include "atom_masks.h"
+#include "memory.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+#define DELTA 10000
+
+/* ---------------------------------------------------------------------- */
+
+AtomVecAngleKokkos::AtomVecAngleKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp)
+{
+  molecular = 1;
+  bonds_allow = angles_allow = 1;
+  mass_type = 1;
+
+  comm_x_only = comm_f_only = 1;
+  size_forward = 3;
+  size_reverse = 3;
+  size_border = 7;
+  size_velocity = 3;
+  size_data_atom = 6;
+  size_data_vel = 4;
+  xcol_data = 4;
+
+  atom->molecule_flag = 1;
+
+  k_count = DAT::tdual_int_1d("atom::k_count",1);
+  atomKK = (AtomKokkos *) atom;
+  commKK = (CommKokkos *) comm;
+}
+
+/* ----------------------------------------------------------------------
+   grow atom arrays
+   n = 0 grows arrays by DELTA
+   n > 0 allocates arrays to size n
+------------------------------------------------------------------------- */
+
+void AtomVecAngleKokkos::grow(int n)
+{
+  if (n == 0) nmax += DELTA;
+  else nmax = n;
+  atomKK->nmax = nmax;
+  if (nmax < 0 || nmax > MAXSMALLINT)
+    error->one(FLERR,"Per-processor system is too big");
+
+  sync(Device,ALL_MASK);
+  modified(Device,ALL_MASK);
+
+  memory->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag");
+  memory->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type");
+  memory->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask");
+  memory->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image");
+
+  memory->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x");
+  memory->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v");
+  memory->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f");
+
+  memory->grow_kokkos(atomKK->k_molecule,atomKK->molecule,nmax,"atom:molecule");
+  memory->grow_kokkos(atomKK->k_nspecial,atomKK->nspecial,nmax,3,"atom:nspecial");
+  memory->grow_kokkos(atomKK->k_special,atomKK->special,nmax,atomKK->maxspecial,
+		      "atom:special");
+  memory->grow_kokkos(atomKK->k_num_bond,atomKK->num_bond,nmax,"atom:num_bond");
+  memory->grow_kokkos(atomKK->k_bond_type,atomKK->bond_type,nmax,atomKK->bond_per_atom,
+		      "atom:bond_type");
+  memory->grow_kokkos(atomKK->k_bond_atom,atomKK->bond_atom,nmax,atomKK->bond_per_atom,
+		      "atom:bond_atom");
+
+  memory->grow_kokkos(atomKK->k_num_angle,atomKK->num_angle,nmax,"atom:num_angle");
+  memory->grow_kokkos(atomKK->k_angle_type,atomKK->angle_type,nmax,atomKK->angle_per_atom,
+		      "atom:angle_type");
+  memory->grow_kokkos(atomKK->k_angle_atom1,atomKK->angle_atom1,nmax,atomKK->angle_per_atom,
+		      "atom:angle_atom1");
+  memory->grow_kokkos(atomKK->k_angle_atom2,atomKK->angle_atom2,nmax,atomKK->angle_per_atom,
+		      "atom:angle_atom2");
+  memory->grow_kokkos(atomKK->k_angle_atom3,atomKK->angle_atom3,nmax,atomKK->angle_per_atom,
+		      "atom:angle_atom3");
+
+  grow_reset();
+  sync(Host,ALL_MASK);
+
+  if (atom->nextra_grow)
+    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
+      modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax);
+}
+
+/* ----------------------------------------------------------------------
+   reset local array ptrs
+------------------------------------------------------------------------- */
+
+void AtomVecAngleKokkos::grow_reset()
+{
+  tag = atomKK->tag;
+  d_tag = atomKK->k_tag.d_view;
+  h_tag = atomKK->k_tag.h_view;
+
+  type = atomKK->type;
+  d_type = atomKK->k_type.d_view;
+  h_type = atomKK->k_type.h_view;
+  mask = atomKK->mask;
+  d_mask = atomKK->k_mask.d_view;
+  h_mask = atomKK->k_mask.h_view;
+  image = atomKK->image;
+  d_image = atomKK->k_image.d_view;
+  h_image = atomKK->k_image.h_view;
+
+  x = atomKK->x;
+  d_x = atomKK->k_x.d_view;
+  h_x = atomKK->k_x.h_view;
+  v = atomKK->v;
+  d_v = atomKK->k_v.d_view;
+  h_v = atomKK->k_v.h_view;
+  f = atomKK->f;
+  d_f = atomKK->k_f.d_view;
+  h_f = atomKK->k_f.h_view;
+
+  molecule = atomKK->molecule;
+  d_molecule = atomKK->k_molecule.d_view;
+  h_molecule = atomKK->k_molecule.h_view;
+  nspecial = atomKK->nspecial;
+  d_nspecial = atomKK->k_nspecial.d_view;
+  h_nspecial = atomKK->k_nspecial.h_view;
+  special = atomKK->special;
+  d_special = atomKK->k_special.d_view;
+  h_special = atomKK->k_special.h_view;
+  num_bond = atomKK->num_bond;
+  d_num_bond = atomKK->k_num_bond.d_view;
+  h_num_bond = atomKK->k_num_bond.h_view;
+  bond_type = atomKK->bond_type;
+  d_bond_type = atomKK->k_bond_type.d_view;
+  h_bond_type = atomKK->k_bond_type.h_view;
+  bond_atom = atomKK->bond_atom;
+  d_bond_atom = atomKK->k_bond_atom.d_view;
+  h_bond_atom = atomKK->k_bond_atom.h_view;
+
+  num_angle = atomKK->num_angle;
+  d_num_angle = atomKK->k_num_angle.d_view;
+  h_num_angle = atomKK->k_num_angle.h_view;
+  angle_type = atomKK->angle_type;
+  d_angle_type = atomKK->k_angle_type.d_view;
+  h_angle_type = atomKK->k_angle_type.h_view;
+  angle_atom1 = atomKK->angle_atom1;
+  d_angle_atom1 = atomKK->k_angle_atom1.d_view;
+  h_angle_atom1 = atomKK->k_angle_atom1.h_view;
+  angle_atom2 = atomKK->angle_atom2;
+  d_angle_atom2 = atomKK->k_angle_atom2.d_view;
+  h_angle_atom2 = atomKK->k_angle_atom2.h_view;
+  angle_atom3 = atomKK->angle_atom3;
+  d_angle_atom3 = atomKK->k_angle_atom3.d_view;
+  h_angle_atom3 = atomKK->k_angle_atom3.h_view;
+}
+
+/* ----------------------------------------------------------------------
+   copy atom I info to atom J
+------------------------------------------------------------------------- */
+
+void AtomVecAngleKokkos::copy(int i, int j, int delflag)
+{
+  int k;
+
+  h_tag[j] = h_tag[i];
+  h_type[j] = h_type[i];
+  mask[j] = mask[i];
+  h_image[j] = h_image[i];
+  h_x(j,0) = h_x(i,0);
+  h_x(j,1) = h_x(i,1);
+  h_x(j,2) = h_x(i,2);
+  h_v(j,0) = h_v(i,0);
+  h_v(j,1) = h_v(i,1);
+  h_v(j,2) = h_v(i,2);
+
+  h_molecule(j) = h_molecule(i);
+
+  h_num_bond(j) = h_num_bond(i);
+  for (k = 0; k < h_num_bond(j); k++) {
+    h_bond_type(j,k) = h_bond_type(i,k);
+    h_bond_atom(j,k) = h_bond_atom(i,k);
+  }
+
+  h_nspecial(j,0) = h_nspecial(i,0);
+  h_nspecial(j,1) = h_nspecial(i,1);
+  h_nspecial(j,2) = h_nspecial(i,2);
+  for (k = 0; k < h_nspecial(j,2); k++)
+    h_special(j,k) = h_special(i,k);
+
+  h_num_angle(j) = h_num_angle(i);
+  for (k = 0; k < h_num_angle(j); k++) {
+    h_angle_type(j,k) = h_angle_type(i,k);
+    h_angle_atom1(j,k) = h_angle_atom1(i,k);
+    h_angle_atom2(j,k) = h_angle_atom2(i,k);
+    h_angle_atom3(j,k) = h_angle_atom3(i,k);
+  }
+
+  if (atom->nextra_grow)
+    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
+      modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType,int PBC_FLAG,int TRICLINIC>
+struct AtomVecAngleKokkos_PackComm {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
+  typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
+  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
+  const int _iswap;
+  X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
+  X_FLOAT _pbc[6];
+
+  AtomVecAngleKokkos_PackComm(
+      const typename DAT::tdual_x_array &x,
+      const typename DAT::tdual_xfloat_2d &buf,
+      const typename DAT::tdual_int_2d &list,
+      const int & iswap,
+      const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
+      const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
+      _x(x.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap),
+      _xprd(xprd),_yprd(yprd),_zprd(zprd),
+      _xy(xy),_xz(xz),_yz(yz) {
+        const size_t maxsend = (buf.view<DeviceType>().dimension_0()
+				*buf.view<DeviceType>().dimension_1())/3;
+        const size_t elements = 3;
+        buffer_view<DeviceType>(_buf,buf,maxsend,elements);
+        _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
+        _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
+  };
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+        const int j = _list(_iswap,i);
+      if (PBC_FLAG == 0) {
+          _buf(i,0) = _x(j,0);
+          _buf(i,1) = _x(j,1);
+          _buf(i,2) = _x(j,2);
+      } else {
+        if (TRICLINIC == 0) {
+          _buf(i,0) = _x(j,0) + _pbc[0]*_xprd;
+          _buf(i,1) = _x(j,1) + _pbc[1]*_yprd;
+          _buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
+        } else {
+          _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
+          _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
+          _buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
+        }
+      }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecAngleKokkos::pack_comm_kokkos(const int &n,
+					 const DAT::tdual_int_2d &list,
+					 const int & iswap,
+					 const DAT::tdual_xfloat_2d &buf,
+					 const int &pbc_flag,
+					 const int* const pbc)
+{
+  // Check whether to always run forward communication on the host
+  // Choose correct forward PackComm kernel
+
+  if(commKK->forward_comm_on_host) {
+    sync(Host,X_MASK);
+    if(pbc_flag) {
+      if(domain->triclinic) {
+        struct AtomVecAngleKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      } else {
+        struct AtomVecAngleKokkos_PackComm<LMPHostType,1,0> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      }
+    } else {
+      if(domain->triclinic) {
+        struct AtomVecAngleKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      } else {
+        struct AtomVecAngleKokkos_PackComm<LMPHostType,0,0> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      }
+    }
+    LMPHostType::fence();
+  } else {
+    sync(Device,X_MASK);
+    if(pbc_flag) {
+      if(domain->triclinic) {
+        struct AtomVecAngleKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      } else {
+        struct AtomVecAngleKokkos_PackComm<LMPDeviceType,1,0> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      }
+    } else {
+      if(domain->triclinic) {
+        struct AtomVecAngleKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      } else {
+        struct AtomVecAngleKokkos_PackComm<LMPDeviceType,0,0> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      }
+    }
+    LMPDeviceType::fence();
+  }
+
+	return n*size_forward;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType,int PBC_FLAG,int TRICLINIC>
+struct AtomVecAngleKokkos_PackCommSelf {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
+  typename ArrayTypes<DeviceType>::t_x_array _xw;
+  int _nfirst;
+  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
+  const int _iswap;
+  X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
+  X_FLOAT _pbc[6];
+
+  AtomVecAngleKokkos_PackCommSelf(
+      const typename DAT::tdual_x_array &x,
+      const int &nfirst,
+      const typename DAT::tdual_int_2d &list,
+      const int & iswap,
+      const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
+      const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
+      _x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),_nfirst(nfirst),_list(list.view<DeviceType>()),_iswap(iswap),
+      _xprd(xprd),_yprd(yprd),_zprd(zprd),
+      _xy(xy),_xz(xz),_yz(yz) {
+        _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
+        _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
+  };
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+        const int j = _list(_iswap,i);
+      if (PBC_FLAG == 0) {
+          _xw(i+_nfirst,0) = _x(j,0);
+          _xw(i+_nfirst,1) = _x(j,1);
+          _xw(i+_nfirst,2) = _x(j,2);
+      } else {
+        if (TRICLINIC == 0) {
+          _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd;
+          _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd;
+          _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
+        } else {
+          _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
+          _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
+          _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
+        }
+      }
+
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecAngleKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
+				       const int & iswap,
+				       const int nfirst, const int &pbc_flag,
+				       const int* const pbc) {
+  if(commKK->forward_comm_on_host) {
+    sync(Host,X_MASK);
+    modified(Host,X_MASK);
+    if(pbc_flag) {
+      if(domain->triclinic) {
+      struct AtomVecAngleKokkos_PackCommSelf<LMPHostType,1,1>
+	f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      } else {
+      struct AtomVecAngleKokkos_PackCommSelf<LMPHostType,1,0>
+	f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      }
+    } else {
+      if(domain->triclinic) {
+      struct AtomVecAngleKokkos_PackCommSelf<LMPHostType,0,1>
+	f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      } else {
+      struct AtomVecAngleKokkos_PackCommSelf<LMPHostType,0,0>
+	f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      }
+    }
+    LMPHostType::fence();
+  } else {
+    sync(Device,X_MASK);
+    modified(Device,X_MASK);
+    if(pbc_flag) {
+      if(domain->triclinic) {
+      struct AtomVecAngleKokkos_PackCommSelf<LMPDeviceType,1,1>
+	f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      } else {
+      struct AtomVecAngleKokkos_PackCommSelf<LMPDeviceType,1,0>
+	f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      }
+    } else {
+      if(domain->triclinic) {
+      struct AtomVecAngleKokkos_PackCommSelf<LMPDeviceType,0,1>
+	f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      } else {
+      struct AtomVecAngleKokkos_PackCommSelf<LMPDeviceType,0,0>
+	f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      }
+    }
+    LMPDeviceType::fence();
+  }
+	return n*3;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecAngleKokkos_UnpackComm {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_x_array _x;
+  typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf;
+  int _first;
+
+  AtomVecAngleKokkos_UnpackComm(
+      const typename DAT::tdual_x_array &x,
+      const typename DAT::tdual_xfloat_2d &buf,
+      const int& first):_x(x.view<DeviceType>()),_buf(buf.view<DeviceType>()),
+                        _first(first) {};
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+      _x(i+_first,0) = _buf(i,0);
+      _x(i+_first,1) = _buf(i,1);
+      _x(i+_first,2) = _buf(i,2);
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecAngleKokkos::unpack_comm_kokkos(const int &n, const int &first,
+    const DAT::tdual_xfloat_2d &buf ) {
+  if(commKK->forward_comm_on_host) {
+    sync(Host,X_MASK);
+    modified(Host,X_MASK);
+    struct AtomVecAngleKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
+    Kokkos::parallel_for(n,f);
+    LMPDeviceType::fence();
+  } else {
+    sync(Device,X_MASK);
+    modified(Device,X_MASK);
+    struct AtomVecAngleKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
+    Kokkos::parallel_for(n,f);
+    LMPDeviceType::fence();
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecAngleKokkos::pack_comm(int n, int *list, double *buf,
+                                 int pbc_flag, int *pbc)
+{
+  int i,j,m;
+  double dx,dy,dz;
+
+  m = 0;
+  if (pbc_flag == 0) {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0);
+      buf[m++] = h_x(j,1);
+      buf[m++] = h_x(j,2);
+    }
+  } else {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
+      dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
+      dz = pbc[2]*domain->zprd;
+    }
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0) + dx;
+      buf[m++] = h_x(j,1) + dy;
+      buf[m++] = h_x(j,2) + dz;
+    }
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecAngleKokkos::pack_comm_vel(int n, int *list, double *buf,
+                                     int pbc_flag, int *pbc)
+{
+  int i,j,m;
+  double dx,dy,dz,dvx,dvy,dvz;
+
+  m = 0;
+  if (pbc_flag == 0) {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0);
+      buf[m++] = h_x(j,1);
+      buf[m++] = h_x(j,2);
+      buf[m++] = h_v(j,0);
+      buf[m++] = h_v(j,1);
+      buf[m++] = h_v(j,2);
+    }
+  } else {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
+      dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
+      dz = pbc[2]*domain->zprd;
+    }
+    if (!deform_vremap) {
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0) + dx;
+        buf[m++] = h_x(j,1) + dy;
+        buf[m++] = h_x(j,2) + dz;
+        buf[m++] = h_v(j,0);
+        buf[m++] = h_v(j,1);
+        buf[m++] = h_v(j,2);
+      }
+    } else {
+      dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
+      dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
+      dvz = pbc[2]*h_rate[2];
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0) + dx;
+        buf[m++] = h_x(j,1) + dy;
+        buf[m++] = h_x(j,2) + dz;
+        if (mask[i] & deform_groupbit) {
+          buf[m++] = h_v(j,0) + dvx;
+          buf[m++] = h_v(j,1) + dvy;
+          buf[m++] = h_v(j,2) + dvz;
+        } else {
+          buf[m++] = h_v(j,0);
+          buf[m++] = h_v(j,1);
+          buf[m++] = h_v(j,2);
+        }
+      }
+    }
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecAngleKokkos::unpack_comm(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    h_x(i,0) = buf[m++];
+    h_x(i,1) = buf[m++];
+    h_x(i,2) = buf[m++];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecAngleKokkos::unpack_comm_vel(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    h_x(i,0) = buf[m++];
+    h_x(i,1) = buf[m++];
+    h_x(i,2) = buf[m++];
+    h_v(i,0) = buf[m++];
+    h_v(i,1) = buf[m++];
+    h_v(i,2) = buf[m++];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecAngleKokkos::pack_reverse(int n, int first, double *buf)
+{
+  if(n > 0)
+    sync(Host,F_MASK);
+
+  int m = 0;
+  const int last = first + n;
+  for (int i = first; i < last; i++) {
+    buf[m++] = h_f(i,0);
+    buf[m++] = h_f(i,1);
+    buf[m++] = h_f(i,2);
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecAngleKokkos::unpack_reverse(int n, int *list, double *buf)
+{
+  if(n > 0)
+    modified(Host,F_MASK);
+
+  int m = 0;
+  for (int i = 0; i < n; i++) {
+    const int j = list[i];
+    h_f(j,0) += buf[m++];
+    h_f(j,1) += buf[m++];
+    h_f(j,2) += buf[m++];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType,int PBC_FLAG>
+struct AtomVecAngleKokkos_PackBorder {
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+
+  typename AT::t_xfloat_2d _buf;
+  const typename AT::t_int_2d_const _list;
+  const int _iswap;
+  const typename AT::t_x_array_randomread _x;
+  const typename AT::t_tagint_1d _tag;
+  const typename AT::t_int_1d _type;
+  const typename AT::t_int_1d _mask;
+  const typename AT::t_tagint_1d _molecule;
+  X_FLOAT _dx,_dy,_dz;
+
+  AtomVecAngleKokkos_PackBorder(
+      const typename AT::t_xfloat_2d &buf,
+      const typename AT::t_int_2d_const &list,
+      const int & iswap,
+      const typename AT::t_x_array &x,
+      const typename AT::t_tagint_1d &tag,
+      const typename AT::t_int_1d &type,
+      const typename AT::t_int_1d &mask,
+      const typename AT::t_tagint_1d &molecule,
+      const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz):
+      _buf(buf),_list(list),_iswap(iswap),
+      _x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule),
+      _dx(dx),_dy(dy),_dz(dz) {}
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+      const int j = _list(_iswap,i);
+      if (PBC_FLAG == 0) {
+          _buf(i,0) = _x(j,0);
+          _buf(i,1) = _x(j,1);
+          _buf(i,2) = _x(j,2);
+          _buf(i,3) = _tag(j);
+          _buf(i,4) = _type(j);
+          _buf(i,5) = _mask(j);
+          _buf(i,6) = _molecule(j);
+      } else {
+          _buf(i,0) = _x(j,0) + _dx;
+          _buf(i,1) = _x(j,1) + _dy;
+          _buf(i,2) = _x(j,2) + _dz;
+          _buf(i,3) = _tag(j);
+          _buf(i,4) = _type(j);
+          _buf(i,5) = _mask(j);
+          _buf(i,6) = _molecule(j);
+      }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecAngleKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
+                                          DAT::tdual_xfloat_2d buf,int iswap,
+                                          int pbc_flag, int *pbc, ExecutionSpace space)
+{
+  X_FLOAT dx,dy,dz;
+
+  if (pbc_flag != 0) {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0];
+      dy = pbc[1];
+      dz = pbc[2];
+    }
+    if(space==Host) {
+      AtomVecAngleKokkos_PackBorder<LMPHostType,1> f(
+        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
+        iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+      LMPHostType::fence();
+    } else {
+      AtomVecAngleKokkos_PackBorder<LMPDeviceType,1> f(
+        buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
+        iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+      LMPDeviceType::fence();
+    }
+
+  } else {
+    dx = dy = dz = 0;
+    if(space==Host) {
+      AtomVecAngleKokkos_PackBorder<LMPHostType,0> f(
+        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
+        iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+      LMPHostType::fence();
+    } else {
+      AtomVecAngleKokkos_PackBorder<LMPDeviceType,0> f(
+        buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
+        iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+      LMPDeviceType::fence();
+    }
+  }
+  return n*size_border;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecAngleKokkos::pack_border(int n, int *list, double *buf,
+                                   int pbc_flag, int *pbc)
+{
+  int i,j,m;
+  double dx,dy,dz;
+
+  m = 0;
+  if (pbc_flag == 0) {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0);
+      buf[m++] = h_x(j,1);
+      buf[m++] = h_x(j,2);
+      buf[m++] = ubuf(h_tag(j)).d;
+      buf[m++] = ubuf(h_type(j)).d;
+      buf[m++] = ubuf(h_mask(j)).d;
+      buf[m++] = ubuf(h_molecule(j)).d;
+    }
+  } else {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0];
+      dy = pbc[1];
+      dz = pbc[2];
+    }
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0) + dx;
+      buf[m++] = h_x(j,1) + dy;
+      buf[m++] = h_x(j,2) + dz;
+      buf[m++] = ubuf(h_tag(j)).d;
+      buf[m++] = ubuf(h_type(j)).d;
+      buf[m++] = ubuf(h_mask(j)).d;
+      buf[m++] = ubuf(h_molecule(j)).d;
+    }
+  }
+
+  if (atom->nextra_border)
+    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
+      m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]);
+
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecAngleKokkos::pack_border_vel(int n, int *list, double *buf,
+                                       int pbc_flag, int *pbc)
+{
+  int i,j,m;
+  double dx,dy,dz,dvx,dvy,dvz;
+
+  m = 0;
+  if (pbc_flag == 0) {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0);
+      buf[m++] = h_x(j,1);
+      buf[m++] = h_x(j,2);
+      buf[m++] = ubuf(h_tag(j)).d;
+      buf[m++] = ubuf(h_type(j)).d;
+      buf[m++] = ubuf(h_mask(j)).d;
+      buf[m++] = ubuf(h_molecule(j)).d;
+      buf[m++] = h_v(j,0);
+      buf[m++] = h_v(j,1);
+      buf[m++] = h_v(j,2);
+    }
+  } else {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0];
+      dy = pbc[1];
+      dz = pbc[2];
+    }
+    if (!deform_vremap) {
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0) + dx;
+        buf[m++] = h_x(j,1) + dy;
+        buf[m++] = h_x(j,2) + dz;
+        buf[m++] = ubuf(h_tag(j)).d;
+        buf[m++] = ubuf(h_type(j)).d;
+        buf[m++] = ubuf(h_mask(j)).d;
+        buf[m++] = ubuf(h_molecule(j)).d;
+        buf[m++] = h_v(j,0);
+        buf[m++] = h_v(j,1);
+        buf[m++] = h_v(j,2);
+      }
+    } else {
+      dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
+      dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
+      dvz = pbc[2]*h_rate[2];
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0) + dx;
+        buf[m++] = h_x(j,1) + dy;
+        buf[m++] = h_x(j,2) + dz;
+        buf[m++] = ubuf(h_tag(j)).d;
+        buf[m++] = ubuf(h_type(j)).d;
+        buf[m++] = ubuf(h_mask(j)).d;
+        buf[m++] = ubuf(h_molecule(j)).d;
+        if (mask[i] & deform_groupbit) {
+          buf[m++] = h_v(j,0) + dvx;
+          buf[m++] = h_v(j,1) + dvy;
+          buf[m++] = h_v(j,2) + dvz;
+        } else {
+          buf[m++] = h_v(j,0);
+          buf[m++] = h_v(j,1);
+          buf[m++] = h_v(j,2);
+        }
+      }
+    }
+  }
+
+  if (atom->nextra_border)
+    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
+      m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]);
+
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecAngleKokkos::pack_border_hybrid(int n, int *list, double *buf)
+{
+  int i,j,m;
+
+  m = 0;
+  for (i = 0; i < n; i++) {
+    j = list[i];
+    buf[m++] = h_molecule(j);
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecAngleKokkos_UnpackBorder {
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+
+  const typename AT::t_xfloat_2d_const _buf;
+  typename AT::t_x_array _x;
+  typename AT::t_tagint_1d _tag;
+  typename AT::t_int_1d _type;
+  typename AT::t_int_1d _mask;
+  typename AT::t_tagint_1d _molecule;
+  int _first;
+
+
+  AtomVecAngleKokkos_UnpackBorder(
+      const typename AT::t_xfloat_2d_const &buf,
+      typename AT::t_x_array &x,
+      typename AT::t_tagint_1d &tag,
+      typename AT::t_int_1d &type,
+      typename AT::t_int_1d &mask,
+      typename AT::t_tagint_1d &molecule,
+      const int& first):
+    _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule),
+    _first(first){
+  };
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+      _x(i+_first,0) = _buf(i,0);
+      _x(i+_first,1) = _buf(i,1);
+      _x(i+_first,2) = _buf(i,2);
+      _tag(i+_first) = static_cast<int> (_buf(i,3));
+      _type(i+_first) = static_cast<int>  (_buf(i,4));
+      _mask(i+_first) = static_cast<int>  (_buf(i,5));
+      _molecule(i+_first) = static_cast<int> (_buf(i,6));
+
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecAngleKokkos::unpack_border_kokkos(const int &n, const int &first,
+                                             const DAT::tdual_xfloat_2d &buf,
+                                             ExecutionSpace space) {
+  modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
+  while (first+n >= nmax) grow(0);
+  modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
+  if(space==Host) {
+    struct AtomVecAngleKokkos_UnpackBorder<LMPHostType>
+      f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_molecule,first);
+    Kokkos::parallel_for(n,f);
+    LMPHostType::fence();
+  } else {
+    struct AtomVecAngleKokkos_UnpackBorder<LMPDeviceType>
+      f(buf.view<LMPDeviceType>(),d_x,d_tag,d_type,d_mask,d_molecule,first);
+    Kokkos::parallel_for(n,f);
+    LMPDeviceType::fence();
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecAngleKokkos::unpack_border(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    if (i == nmax) grow(0);
+    modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
+    h_x(i,0) = buf[m++];
+    h_x(i,1) = buf[m++];
+    h_x(i,2) = buf[m++];
+    h_tag(i) =  (tagint)  ubuf(buf[m++]).i;
+    h_type(i) = (int) ubuf(buf[m++]).i;
+    h_mask(i) = (int) ubuf(buf[m++]).i;
+    h_molecule(i) = (tagint) ubuf(buf[m++]).i;
+  }
+
+  if (atom->nextra_border)
+    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
+      m += modify->fix[atom->extra_border[iextra]]->
+        unpack_border(n,first,&buf[m]);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecAngleKokkos::unpack_border_vel(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    if (i == nmax) grow(0);
+    h_x(i,0) = buf[m++];
+    h_x(i,1) = buf[m++];
+    h_x(i,2) = buf[m++];
+    h_tag(i) =  (tagint)  ubuf(buf[m++]).i;
+    h_type(i) = (int) ubuf(buf[m++]).i;
+    h_mask(i) = (int) ubuf(buf[m++]).i;
+    h_molecule(i) = (tagint) ubuf(buf[m++]).i;
+    h_v(i,0) = buf[m++];
+    h_v(i,1) = buf[m++];
+    h_v(i,2) = buf[m++];
+  }
+
+  if (atom->nextra_border)
+    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
+      m += modify->fix[atom->extra_border[iextra]]->
+        unpack_border(n,first,&buf[m]);
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecAngleKokkos::unpack_border_hybrid(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++)
+    h_molecule(i) = (tagint) ubuf(buf[m++]).i;
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecAngleKokkos_PackExchangeFunctor {
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+  typename AT::t_x_array_randomread _x;
+  typename AT::t_v_array_randomread _v;
+  typename AT::t_tagint_1d_randomread _tag;
+  typename AT::t_int_1d_randomread _type;
+  typename AT::t_int_1d_randomread _mask;
+  typename AT::t_imageint_1d_randomread _image;
+  typename AT::t_tagint_1d_randomread _molecule;
+  typename AT::t_int_2d_randomread _nspecial;
+  typename AT::t_tagint_2d_randomread _special;
+  typename AT::t_int_1d_randomread _num_bond;
+  typename AT::t_int_2d_randomread _bond_type;
+  typename AT::t_tagint_2d_randomread _bond_atom;
+  typename AT::t_int_1d_randomread _num_angle;
+  typename AT::t_int_2d_randomread _angle_type;
+  typename AT::t_tagint_2d_randomread _angle_atom1,_angle_atom2,_angle_atom3;
+  typename AT::t_x_array _xw;
+  typename AT::t_v_array _vw;
+  typename AT::t_tagint_1d _tagw;
+  typename AT::t_int_1d _typew;
+  typename AT::t_int_1d _maskw;
+  typename AT::t_imageint_1d _imagew;
+  typename AT::t_tagint_1d _moleculew;
+  typename AT::t_int_2d _nspecialw;
+  typename AT::t_tagint_2d _specialw;
+  typename AT::t_int_1d _num_bondw;
+  typename AT::t_int_2d _bond_typew;
+  typename AT::t_tagint_2d _bond_atomw;
+  typename AT::t_int_1d _num_anglew;
+  typename AT::t_int_2d _angle_typew;
+  typename AT::t_tagint_2d _angle_atom1w,_angle_atom2w,_angle_atom3w;
+
+  typename AT::t_xfloat_2d_um _buf;
+  typename AT::t_int_1d_const _sendlist;
+  typename AT::t_int_1d_const _copylist;
+  int _nlocal,_dim;
+  X_FLOAT _lo,_hi;
+  size_t elements;
+
+  AtomVecAngleKokkos_PackExchangeFunctor(
+      const AtomKokkos* atom,
+      const typename AT::tdual_xfloat_2d buf,
+      typename AT::tdual_int_1d sendlist,
+      typename AT::tdual_int_1d copylist,int nlocal, int dim,
+                X_FLOAT lo, X_FLOAT hi):
+    _x(atom->k_x.view<DeviceType>()),
+    _v(atom->k_v.view<DeviceType>()),
+    _tag(atom->k_tag.view<DeviceType>()),
+    _type(atom->k_type.view<DeviceType>()),
+    _mask(atom->k_mask.view<DeviceType>()),
+    _image(atom->k_image.view<DeviceType>()),
+    _molecule(atom->k_molecule.view<DeviceType>()),
+    _nspecial(atom->k_nspecial.view<DeviceType>()),
+    _special(atom->k_special.view<DeviceType>()),
+    _num_bond(atom->k_num_bond.view<DeviceType>()),
+    _bond_type(atom->k_bond_type.view<DeviceType>()),
+    _bond_atom(atom->k_bond_atom.view<DeviceType>()),
+    _num_angle(atom->k_num_angle.view<DeviceType>()),
+    _angle_type(atom->k_angle_type.view<DeviceType>()),
+    _angle_atom1(atom->k_angle_atom1.view<DeviceType>()),
+    _angle_atom2(atom->k_angle_atom2.view<DeviceType>()),
+    _angle_atom3(atom->k_angle_atom3.view<DeviceType>()),
+    _xw(atom->k_x.view<DeviceType>()),
+    _vw(atom->k_v.view<DeviceType>()),
+    _tagw(atom->k_tag.view<DeviceType>()),
+    _typew(atom->k_type.view<DeviceType>()),
+    _maskw(atom->k_mask.view<DeviceType>()),
+    _imagew(atom->k_image.view<DeviceType>()),
+    _moleculew(atom->k_molecule.view<DeviceType>()),
+    _nspecialw(atom->k_nspecial.view<DeviceType>()),
+    _specialw(atom->k_special.view<DeviceType>()),
+    _num_bondw(atom->k_num_bond.view<DeviceType>()),
+    _bond_typew(atom->k_bond_type.view<DeviceType>()),
+    _bond_atomw(atom->k_bond_atom.view<DeviceType>()),
+    _num_anglew(atom->k_num_angle.view<DeviceType>()),
+    _angle_typew(atom->k_angle_type.view<DeviceType>()),
+    _angle_atom1w(atom->k_angle_atom1.view<DeviceType>()),
+    _angle_atom2w(atom->k_angle_atom2.view<DeviceType>()),
+    _angle_atom3w(atom->k_angle_atom3.view<DeviceType>()),
+    _sendlist(sendlist.template view<DeviceType>()),
+    _copylist(copylist.template view<DeviceType>()),
+    _nlocal(nlocal),_dim(dim),
+    _lo(lo),_hi(hi){
+    // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial,
+    // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom,
+    // 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2,
+    // and angle_atom3
+    // 1 to store buffer length
+    elements = 17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom;
+    const int maxsendlist = (buf.template view<DeviceType>().dimension_0()*
+			     buf.template view<DeviceType>().dimension_1())/elements;
+    buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int &mysend) const {
+    int k;
+    const int i = _sendlist(mysend);
+    _buf(mysend,0) = elements;
+    int m = 1;
+    _buf(mysend,m++) = _x(i,0);
+    _buf(mysend,m++) = _x(i,1);
+    _buf(mysend,m++) = _x(i,2);
+    _buf(mysend,m++) = _v(i,0);
+    _buf(mysend,m++) = _v(i,1);
+    _buf(mysend,m++) = _v(i,2);
+    _buf(mysend,m++) = _tag(i);
+    _buf(mysend,m++) = _type(i);
+    _buf(mysend,m++) = _mask(i);
+    _buf(mysend,m++) = _image(i);
+    _buf(mysend,m++) = _molecule(i);
+    _buf(mysend,m++) = _num_bond(i);
+    for (k = 0; k < _num_bond(i); k++) {
+      _buf(mysend,m++) = _bond_type(i,k);
+      _buf(mysend,m++) = _bond_atom(i,k);
+    }
+    _buf(mysend,m++) = _num_angle(i);
+    for (k = 0; k < _num_angle(i); k++) {
+      _buf(mysend,m++) = _angle_type(i,k);
+      _buf(mysend,m++) = _angle_atom1(i,k);
+      _buf(mysend,m++) = _angle_atom2(i,k);
+      _buf(mysend,m++) = _angle_atom3(i,k);
+    }
+    _buf(mysend,m++) = _nspecial(i,0);
+    _buf(mysend,m++) = _nspecial(i,1);
+    _buf(mysend,m++) = _nspecial(i,2);
+    for (k = 0; k < _nspecial(i,2); k++)
+      _buf(mysend,m++) = _special(i,k);
+
+    const int j = _copylist(mysend);
+
+    if(j>-1) {
+    _xw(i,0) = _x(j,0);
+    _xw(i,1) = _x(j,1);
+    _xw(i,2) = _x(j,2);
+    _vw(i,0) = _v(j,0);
+    _vw(i,1) = _v(j,1);
+    _vw(i,2) = _v(j,2);
+    _tagw(i) = _tag(j);
+    _typew(i) = _type(j);
+    _maskw(i) = _mask(j);
+    _imagew(i) = _image(j);
+    _moleculew(i) = _molecule(j);
+    _num_bondw(i) = _num_bond(j);
+    for (k = 0; k < _num_bond(j); k++) {
+      _bond_typew(i,k) = _bond_type(j,k);
+      _bond_atomw(i,k) = _bond_atom(j,k);
+    }
+    _num_anglew(i) = _num_angle(j);
+    for (k = 0; k < _num_angle(j); k++) {
+      _angle_typew(i,k) = _angle_type(j,k);
+      _angle_atom1w(i,k) = _angle_atom1(j,k);
+      _angle_atom2w(i,k) = _angle_atom2(j,k);
+      _angle_atom3w(i,k) = _angle_atom3(j,k);
+    }
+    _nspecialw(i,0) = _nspecial(j,0);
+    _nspecialw(i,1) = _nspecial(j,1);
+    _nspecialw(i,2) = _nspecial(j,2);
+    for (k = 0; k < _nspecial(j,2); k++)
+      _specialw(i,k) = _special(j,k);
+    }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecAngleKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf,
+                                            DAT::tdual_int_1d k_sendlist,
+                                            DAT::tdual_int_1d k_copylist,
+                                            ExecutionSpace space,int dim,X_FLOAT lo,
+                                            X_FLOAT hi )
+{
+  const int elements = 17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom;
+  if(nsend > (int) (k_buf.view<LMPHostType>().dimension_0()*
+	      k_buf.view<LMPHostType>().dimension_1())/elements) {
+    int newsize = nsend*elements/k_buf.view<LMPHostType>().dimension_1()+1;
+    k_buf.resize(newsize,k_buf.view<LMPHostType>().dimension_1());
+  }
+  if(space == Host) {
+    AtomVecAngleKokkos_PackExchangeFunctor<LMPHostType>
+      f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
+    Kokkos::parallel_for(nsend,f);
+    LMPHostType::fence();
+    return nsend*elements;
+  } else {
+    AtomVecAngleKokkos_PackExchangeFunctor<LMPDeviceType>
+      f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
+    Kokkos::parallel_for(nsend,f);
+    LMPDeviceType::fence();
+    return nsend*elements;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecAngleKokkos::pack_exchange(int i, double *buf)
+{
+  int k;
+  int m = 1;
+  buf[m++] = h_x(i,0);
+  buf[m++] = h_x(i,1);
+  buf[m++] = h_x(i,2);
+  buf[m++] = h_v(i,0);
+  buf[m++] = h_v(i,1);
+  buf[m++] = h_v(i,2);
+  buf[m++] = ubuf(h_tag(i)).d;
+  buf[m++] = ubuf(h_type(i)).d;
+  buf[m++] = ubuf(h_mask(i)).d;
+  buf[m++] = ubuf(h_image(i)).d;
+  buf[m++] = ubuf(h_molecule(i)).d;
+
+  buf[m++] = ubuf(h_num_bond(i)).d;
+  for (k = 0; k < h_num_bond(i); k++) {
+    buf[m++] = ubuf(h_bond_type(i,k)).d;
+    buf[m++] = ubuf(h_bond_atom(i,k)).d;
+  }
+  buf[m++] = ubuf(h_num_angle(i)).d;
+  for (k = 0; k < h_num_angle(i); k++) {
+    buf[m++] = ubuf(h_angle_type(i,k)).d;
+    buf[m++] = ubuf(h_angle_atom1(i,k)).d;
+    buf[m++] = ubuf(h_angle_atom2(i,k)).d;
+    buf[m++] = ubuf(h_angle_atom3(i,k)).d;
+  }
+  buf[m++] = ubuf(h_nspecial(i,0)).d;
+  buf[m++] = ubuf(h_nspecial(i,1)).d;
+  buf[m++] = ubuf(h_nspecial(i,2)).d;
+  for (k = 0; k < h_nspecial(i,2); k++)
+    buf[m++] = ubuf(h_special(i,k)).d;
+
+  if (atom->nextra_grow)
+    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
+      m += modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&buf[m]);
+
+  buf[0] = m;
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecAngleKokkos_UnpackExchangeFunctor {
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+  typename AT::t_x_array _x;
+  typename AT::t_v_array _v;
+  typename AT::t_tagint_1d _tag;
+  typename AT::t_int_1d _type;
+  typename AT::t_int_1d _mask;
+  typename AT::t_imageint_1d _image;
+  typename AT::t_tagint_1d _molecule;
+  typename AT::t_int_2d _nspecial;
+  typename AT::t_tagint_2d _special;
+  typename AT::t_int_1d _num_bond;
+  typename AT::t_int_2d _bond_type;
+  typename AT::t_tagint_2d _bond_atom;
+  typename AT::t_int_1d _num_angle;
+  typename AT::t_int_2d _angle_type;
+  typename AT::t_tagint_2d _angle_atom1,_angle_atom2,_angle_atom3;
+
+  typename AT::t_xfloat_2d_um _buf;
+  typename AT::t_int_1d _nlocal;
+  int _dim;
+  X_FLOAT _lo,_hi;
+  size_t elements;
+
+  AtomVecAngleKokkos_UnpackExchangeFunctor(
+      const AtomKokkos* atom,
+      const typename AT::tdual_xfloat_2d buf,
+      typename AT::tdual_int_1d nlocal,
+      int dim, X_FLOAT lo, X_FLOAT hi):
+    _x(atom->k_x.view<DeviceType>()),
+    _v(atom->k_v.view<DeviceType>()),
+    _tag(atom->k_tag.view<DeviceType>()),
+    _type(atom->k_type.view<DeviceType>()),
+    _mask(atom->k_mask.view<DeviceType>()),
+    _image(atom->k_image.view<DeviceType>()),
+    _molecule(atom->k_molecule.view<DeviceType>()),
+    _nspecial(atom->k_nspecial.view<DeviceType>()),
+    _special(atom->k_special.view<DeviceType>()),
+    _num_bond(atom->k_num_bond.view<DeviceType>()),
+    _bond_type(atom->k_bond_type.view<DeviceType>()),
+    _bond_atom(atom->k_bond_atom.view<DeviceType>()),
+    _num_angle(atom->k_num_angle.view<DeviceType>()),
+    _angle_type(atom->k_angle_type.view<DeviceType>()),
+    _angle_atom1(atom->k_angle_atom1.view<DeviceType>()),
+    _angle_atom2(atom->k_angle_atom2.view<DeviceType>()),
+    _angle_atom3(atom->k_angle_atom3.view<DeviceType>()),
+    _nlocal(nlocal.template view<DeviceType>()),_dim(dim),
+    _lo(lo),_hi(hi){
+    elements =17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom;
+    const int maxsendlist = (buf.template view<DeviceType>().dimension_0()*
+			     buf.template view<DeviceType>().dimension_1())/elements;
+    buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int &myrecv) const {
+    X_FLOAT x = _buf(myrecv,_dim+1);
+    if (x >= _lo && x < _hi) {
+      int i = Kokkos::atomic_fetch_add(&_nlocal(0),1);
+      int m = 1;
+      _x(i,0) = _buf(myrecv,m++);
+      _x(i,1) = _buf(myrecv,m++);
+      _x(i,2) = _buf(myrecv,m++);
+      _v(i,0) = _buf(myrecv,m++);
+      _v(i,1) = _buf(myrecv,m++);
+      _v(i,2) = _buf(myrecv,m++);
+      _tag(i) = _buf(myrecv,m++);
+      _type(i) = _buf(myrecv,m++);
+      _mask(i) = _buf(myrecv,m++);
+      _image(i) = _buf(myrecv,m++);
+
+      _molecule(i) = _buf(myrecv,m++);
+      _num_bond(i) = _buf(myrecv,m++);
+      int k;
+      for (k = 0; k < _num_bond(i); k++) {
+        _bond_type(i,k) = _buf(myrecv,m++);
+        _bond_atom(i,k) = _buf(myrecv,m++);
+      }
+      _num_angle(i) =  _buf(myrecv,m++);
+      for (k = 0; k < _num_angle(i); k++) {
+	_angle_type(i,k) = _buf(myrecv,m++);
+	_angle_atom1(i,k) = _buf(myrecv,m++);
+	_angle_atom2(i,k) = _buf(myrecv,m++);
+	_angle_atom3(i,k) = _buf(myrecv,m++);
+      }
+      _nspecial(i,0) = _buf(myrecv,m++);
+      _nspecial(i,1) = _buf(myrecv,m++);
+      _nspecial(i,2) = _buf(myrecv,m++);
+      for (k = 0; k < _nspecial(i,2); k++)
+        _special(i,k) = _buf(myrecv,m++);
+    }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecAngleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,
+                                              int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,
+                                              ExecutionSpace space) {
+  const size_t elements = 17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom;
+  if(space == Host) {
+    k_count.h_view(0) = nlocal;
+    AtomVecAngleKokkos_UnpackExchangeFunctor<LMPHostType>
+      f(atomKK,k_buf,k_count,dim,lo,hi);
+    Kokkos::parallel_for(nrecv/elements,f);
+    LMPHostType::fence();
+    return k_count.h_view(0);
+  } else {
+    k_count.h_view(0) = nlocal;
+    k_count.modify<LMPHostType>();
+    k_count.sync<LMPDeviceType>();
+    AtomVecAngleKokkos_UnpackExchangeFunctor<LMPDeviceType>
+      f(atomKK,k_buf,k_count,dim,lo,hi);
+    Kokkos::parallel_for(nrecv/elements,f);
+    LMPDeviceType::fence();
+    k_count.modify<LMPDeviceType>();
+    k_count.sync<LMPHostType>();
+
+    return k_count.h_view(0);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecAngleKokkos::unpack_exchange(double *buf)
+{
+  int nlocal = atom->nlocal;
+  if (nlocal == nmax) grow(0);
+  modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
+           MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK |
+	   ANGLE_MASK | SPECIAL_MASK);
+
+  int k;
+  int m = 1;
+  h_x(nlocal,0) = buf[m++];
+  h_x(nlocal,1) = buf[m++];
+  h_x(nlocal,2) = buf[m++];
+  h_v(nlocal,0) = buf[m++];
+  h_v(nlocal,1) = buf[m++];
+  h_v(nlocal,2) = buf[m++];
+  h_tag(nlocal) = (tagint) ubuf(buf[m++]).i;
+  h_type(nlocal) = (int) ubuf(buf[m++]).i;
+  h_mask(nlocal) = (int) ubuf(buf[m++]).i;
+  h_image(nlocal) = (imageint) ubuf(buf[m++]).i;
+  h_molecule(nlocal) = (tagint) ubuf(buf[m++]).i;
+
+  h_num_bond(nlocal) = (int) ubuf(buf[m++]).i;
+  for (k = 0; k < h_num_bond(nlocal); k++) {
+    h_bond_type(nlocal,k) = (int) ubuf(buf[m++]).i;
+    h_bond_atom(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+  }
+  h_num_angle(nlocal) = (int) ubuf(buf[m++]).i;
+  for (k = 0; k < h_num_angle(nlocal); k++) {
+    h_angle_type(nlocal,k) = (int) ubuf(buf[m++]).i;
+    h_angle_atom1(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_angle_atom2(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_angle_atom3(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+  }
+  h_nspecial(nlocal,0) = (int) ubuf(buf[m++]).i;
+  h_nspecial(nlocal,1) = (int) ubuf(buf[m++]).i;
+  h_nspecial(nlocal,2) = (int) ubuf(buf[m++]).i;
+  for (k = 0; k < h_nspecial(nlocal,2); k++)
+   h_special(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+
+  if (atom->nextra_grow)
+    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
+      m += modify->fix[atom->extra_grow[iextra]]->
+        unpack_exchange(nlocal,&buf[m]);
+
+  atom->nlocal++;
+  return m;
+}
+
+/* ----------------------------------------------------------------------
+   size of restart data for all atoms owned by this proc
+   include extra data stored by fixes
+------------------------------------------------------------------------- */
+
+int AtomVecAngleKokkos::size_restart()
+{
+  int i;
+
+  int nlocal = atom->nlocal;
+  int n = 0;
+  for (i = 0; i < nlocal; i++)
+    n += 14 + 2*h_num_bond(i) + 4*h_num_angle(i);
+
+  if (atom->nextra_restart)
+    for (int iextra = 0; iextra < atom->nextra_restart; iextra++)
+      for (i = 0; i < nlocal; i++)
+        n += modify->fix[atom->extra_restart[iextra]]->size_restart(i);
+
+  return n;
+}
+
+/* ----------------------------------------------------------------------
+   pack atom I's data for restart file including extra quantities
+   xyz must be 1st 3 values, so that read_restart can test on them
+   molecular types may be negative, but write as positive
+------------------------------------------------------------------------- */
+
+int AtomVecAngleKokkos::pack_restart(int i, double *buf)
+{
+  int m = 1;
+  buf[m++] = h_x(i,0);
+  buf[m++] = h_x(i,1);
+  buf[m++] = h_x(i,2);
+  buf[m++] = ubuf(h_tag(i)).d;
+  buf[m++] = ubuf(h_type(i)).d;
+  buf[m++] = ubuf(h_mask(i)).d;
+  buf[m++] = ubuf(h_image(i)).d;
+  buf[m++] = h_v(i,0);
+  buf[m++] = h_v(i,1);
+  buf[m++] = h_v(i,2);
+
+  buf[m++] = ubuf(h_molecule(i)).d;
+
+  buf[m++] = ubuf(h_num_bond(i)).d;
+  for (int k = 0; k < h_num_bond(i); k++) {
+    buf[m++] = ubuf(MAX(h_bond_type(i,k),-h_bond_type(i,k))).d;
+    buf[m++] = ubuf(h_bond_atom(i,k)).d;
+  }
+
+  buf[m++] = ubuf(h_num_angle(i)).d;
+  for (int k = 0; k < h_num_angle(i); k++) {
+    buf[m++] = ubuf(MAX(h_angle_type(i,k),-h_angle_type(i,k))).d;
+    buf[m++] = ubuf(h_angle_atom1(i,k)).d;
+    buf[m++] = ubuf(h_angle_atom2(i,k)).d;
+    buf[m++] = ubuf(h_angle_atom3(i,k)).d;
+  }
+
+  if (atom->nextra_restart)
+    for (int iextra = 0; iextra < atom->nextra_restart; iextra++)
+      m += modify->fix[atom->extra_restart[iextra]]->pack_restart(i,&buf[m]);
+
+  buf[0] = m;
+  return m;
+}
+
+/* ----------------------------------------------------------------------
+   unpack data for one atom from restart file including extra quantities
+------------------------------------------------------------------------- */
+
+int AtomVecAngleKokkos::unpack_restart(double *buf)
+{
+  int k;
+
+  int nlocal = atom->nlocal;
+  if (nlocal == nmax) {
+    grow(0);
+    if (atom->nextra_store)
+      memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra");
+  }
+
+  int m = 1;
+  h_x(nlocal,0) = buf[m++];
+  h_x(nlocal,1) = buf[m++];
+  h_x(nlocal,2) = buf[m++];
+  h_tag(nlocal) = (tagint) ubuf(buf[m++]).i;
+  h_type(nlocal) = (int) ubuf(buf[m++]).i;
+  h_mask(nlocal) = (int) ubuf(buf[m++]).i;
+  h_image(nlocal) = (imageint) ubuf(buf[m++]).i;
+  h_v(nlocal,0) = buf[m++];
+  h_v(nlocal,1) = buf[m++];
+  h_v(nlocal,2) = buf[m++];
+
+  h_molecule(nlocal) = (tagint) ubuf(buf[m++]).i;
+
+  h_num_bond(nlocal) = (int) ubuf(buf[m++]).i;
+  for (k = 0; k < h_num_bond(nlocal); k++) {
+    h_bond_type(nlocal,k) = (int) ubuf(buf[m++]).i;
+    h_bond_atom(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+  }
+
+  h_num_angle(nlocal) = (int) ubuf(buf[m++]).i;
+  for (k = 0; k < h_num_angle(nlocal); k++) {
+    h_angle_type(nlocal,k) = (int) ubuf(buf[m++]).i;
+    h_angle_atom1(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_angle_atom2(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_angle_atom3(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+  }
+
+  h_nspecial(nlocal,0) = h_nspecial(nlocal,1) = h_nspecial(nlocal,2) = 0;
+
+  double **extra = atom->extra;
+  if (atom->nextra_store) {
+    int size = static_cast<int> (ubuf(buf[m++]).i) - m;
+    for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++];
+  }
+
+  atom->nlocal++;
+  return m;
+}
+
+/* ----------------------------------------------------------------------
+   create one atom of itype at coord
+   set other values to defaults
+------------------------------------------------------------------------- */
+
+void AtomVecAngleKokkos::create_atom(int itype, double *coord)
+{
+  int nlocal = atom->nlocal;
+  if (nlocal == nmax) {
+    atomKK->modified(Host,ALL_MASK);
+    grow(0);
+  }
+  atomKK->modified(Host,ALL_MASK);
+
+  tag[nlocal] = 0;
+  type[nlocal] = itype;
+  h_x(nlocal,0) = coord[0];
+  h_x(nlocal,1) = coord[1];
+  h_x(nlocal,2) = coord[2];
+  h_mask(nlocal) = 1;
+  h_image(nlocal) = ((imageint) IMGMAX << IMG2BITS) |
+    ((imageint) IMGMAX << IMGBITS) | IMGMAX;
+  h_v(nlocal,0) = 0.0;
+  h_v(nlocal,1) = 0.0;
+  h_v(nlocal,2) = 0.0;
+
+  h_molecule(nlocal) = 0;
+  h_num_bond(nlocal) = 0;
+  h_num_angle(nlocal) = 0;
+  h_nspecial(nlocal,0) = h_nspecial(nlocal,1) = h_nspecial(nlocal,2) = 0;
+
+  atom->nlocal++;
+}
+
+/* ----------------------------------------------------------------------
+   unpack one line from Atoms section of data file
+   initialize other atom quantities
+------------------------------------------------------------------------- */
+
+void AtomVecAngleKokkos::data_atom(double *coord, imageint imagetmp,
+                                  char **values)
+{
+  int nlocal = atom->nlocal;
+  if (nlocal == nmax) grow(0);
+
+  h_tag(nlocal) = atoi(values[0]);
+  if (h_tag(nlocal) <= 0)
+    error->one(FLERR,"Invalid atom ID in Atoms section of data file");
+
+  h_molecule(nlocal) = atoi(values[1]);
+  if (h_molecule(nlocal) <= 0)
+    error->one(FLERR,"Invalid molecule ID in Atoms section of data file");
+
+  h_type(nlocal) = atoi(values[2]);
+  if (h_type(nlocal) <= 0 || h_type(nlocal) > atom->ntypes)
+    error->one(FLERR,"Invalid atom type in Atoms section of data file");
+
+  h_x(nlocal,0) = coord[0];
+  h_x(nlocal,1) = coord[1];
+  h_x(nlocal,2) = coord[2];
+
+  h_image(nlocal) = imagetmp;
+
+  h_mask(nlocal) = 1;
+  h_v(nlocal,0) = 0.0;
+  h_v(nlocal,1) = 0.0;
+  h_v(nlocal,2) = 0.0;
+  h_num_bond(nlocal) = 0;
+  h_num_angle(nlocal) = 0;
+
+  atom->nlocal++;
+}
+
+/* ----------------------------------------------------------------------
+   unpack hybrid quantities from one line in Atoms section of data file
+   initialize other atom quantities for this sub-style
+------------------------------------------------------------------------- */
+
+int AtomVecAngleKokkos::data_atom_hybrid(int nlocal, char **values)
+{
+  h_molecule(nlocal) = atoi(values[0]);
+  h_num_bond(nlocal) = 0;
+  h_num_angle(nlocal) = 0;
+  return 1;
+}
+
+/* ----------------------------------------------------------------------
+   pack atom info for data file including 3 image flags
+------------------------------------------------------------------------- */
+
+void AtomVecAngleKokkos::pack_data(double **buf)
+{
+  int nlocal = atom->nlocal;
+  for (int i = 0; i < nlocal; i++) {
+    buf[i][0] = h_tag(i);
+    buf[i][1] = h_molecule(i);
+    buf[i][2] = h_type(i);
+    buf[i][3] = h_x(i,0);
+    buf[i][4] = h_x(i,1);
+    buf[i][5] = h_x(i,2);
+    buf[i][6] = (h_image[i] & IMGMASK) - IMGMAX;
+    buf[i][7] = (h_image[i] >> IMGBITS & IMGMASK) - IMGMAX;
+    buf[i][8] = (h_image[i] >> IMG2BITS) - IMGMAX;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   pack hybrid atom info for data file
+------------------------------------------------------------------------- */
+
+int AtomVecAngleKokkos::pack_data_hybrid(int i, double *buf)
+{
+  buf[0] = h_molecule(i);
+  return 1;
+}
+
+/* ----------------------------------------------------------------------
+   write atom info to data file including 3 image flags
+------------------------------------------------------------------------- */
+
+void AtomVecAngleKokkos::write_data(FILE *fp, int n, double **buf)
+{
+  for (int i = 0; i < n; i++)
+    fprintf(fp,"%d %d %d %-1.16e %-1.16e %-1.16e %d %d %d\n",
+            (int) buf[i][0],(int) buf[i][1], (int) buf[i][2],
+	    buf[i][3],buf[i][4],buf[i][5],
+            (int) buf[i][6],(int) buf[i][7],(int) buf[i][8]);
+}
+
+/* ----------------------------------------------------------------------
+   write hybrid atom info to data file
+------------------------------------------------------------------------- */
+
+int AtomVecAngleKokkos::write_data_hybrid(FILE *fp, double *buf)
+{
+  fprintf(fp," " TAGINT_FORMAT, (tagint) (buf[0]));
+  return 1;
+}
+
+/* ----------------------------------------------------------------------
+   return # of bytes of allocated memory
+------------------------------------------------------------------------- */
+
+bigint AtomVecAngleKokkos::memory_usage()
+{
+  bigint bytes = 0;
+
+  if (atom->memcheck("tag")) bytes += memory->usage(tag,nmax);
+  if (atom->memcheck("type")) bytes += memory->usage(type,nmax);
+  if (atom->memcheck("mask")) bytes += memory->usage(mask,nmax);
+  if (atom->memcheck("image")) bytes += memory->usage(image,nmax);
+  if (atom->memcheck("x")) bytes += memory->usage(x,nmax,3);
+  if (atom->memcheck("v")) bytes += memory->usage(v,nmax,3);
+  if (atom->memcheck("f")) bytes += memory->usage(f,nmax*commKK->nthreads,3);
+
+  if (atom->memcheck("molecule")) bytes += memory->usage(molecule,nmax);
+  if (atom->memcheck("nspecial")) bytes += memory->usage(nspecial,nmax,3);
+  if (atom->memcheck("special"))
+    bytes += memory->usage(special,nmax,atom->maxspecial);
+
+  if (atom->memcheck("num_bond")) bytes += memory->usage(num_bond,nmax);
+  if (atom->memcheck("bond_type"))
+    bytes += memory->usage(bond_type,nmax,atom->bond_per_atom);
+  if (atom->memcheck("bond_atom"))
+    bytes += memory->usage(bond_atom,nmax,atom->bond_per_atom);
+
+  if (atom->memcheck("num_angle")) bytes += memory->usage(num_angle,nmax);
+  if (atom->memcheck("angle_type"))
+    bytes += memory->usage(angle_type,nmax,atom->angle_per_atom);
+  if (atom->memcheck("angle_atom1"))
+    bytes += memory->usage(angle_atom1,nmax,atom->angle_per_atom);
+  if (atom->memcheck("angle_atom2"))
+    bytes += memory->usage(angle_atom2,nmax,atom->angle_per_atom);
+  if (atom->memcheck("angle_atom3"))
+    bytes += memory->usage(angle_atom3,nmax,atom->angle_per_atom);
+
+  return bytes;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecAngleKokkos::sync(ExecutionSpace space, unsigned int mask)
+{
+  if (space == Device) {
+    if (mask & X_MASK) atomKK->k_x.sync<LMPDeviceType>();
+    if (mask & V_MASK) atomKK->k_v.sync<LMPDeviceType>();
+    if (mask & F_MASK) atomKK->k_f.sync<LMPDeviceType>();
+    if (mask & TAG_MASK) atomKK->k_tag.sync<LMPDeviceType>();
+    if (mask & TYPE_MASK) atomKK->k_type.sync<LMPDeviceType>();
+    if (mask & MASK_MASK) atomKK->k_mask.sync<LMPDeviceType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPDeviceType>();
+    if (mask && MOLECULE_MASK) atomKK->k_molecule.sync<LMPDeviceType>();
+    if (mask && SPECIAL_MASK) {
+      atomKK->k_nspecial.sync<LMPDeviceType>();
+      atomKK->k_special.sync<LMPDeviceType>();
+    }
+    if (mask && BOND_MASK) {
+      atomKK->k_num_bond.sync<LMPDeviceType>();
+      atomKK->k_bond_type.sync<LMPDeviceType>();
+      atomKK->k_bond_atom.sync<LMPDeviceType>();
+    }
+    if (mask && ANGLE_MASK) {
+      atomKK->k_num_angle.sync<LMPDeviceType>();
+      atomKK->k_angle_type.sync<LMPDeviceType>();
+      atomKK->k_angle_atom1.sync<LMPDeviceType>();
+      atomKK->k_angle_atom2.sync<LMPDeviceType>();
+      atomKK->k_angle_atom3.sync<LMPDeviceType>();
+    }
+  } else {
+    if (mask & X_MASK) atomKK->k_x.sync<LMPHostType>();
+    if (mask & V_MASK) atomKK->k_v.sync<LMPHostType>();
+    if (mask & F_MASK) atomKK->k_f.sync<LMPHostType>();
+    if (mask & TAG_MASK) atomKK->k_tag.sync<LMPHostType>();
+    if (mask & TYPE_MASK) atomKK->k_type.sync<LMPHostType>();
+    if (mask & MASK_MASK) atomKK->k_mask.sync<LMPHostType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPHostType>();
+    if (mask && MOLECULE_MASK) atomKK->k_molecule.sync<LMPHostType>();
+    if (mask && SPECIAL_MASK) {
+      atomKK->k_nspecial.sync<LMPHostType>();
+      atomKK->k_special.sync<LMPHostType>();
+    }
+    if (mask && BOND_MASK) {
+      atomKK->k_num_bond.sync<LMPHostType>();
+      atomKK->k_bond_type.sync<LMPHostType>();
+      atomKK->k_bond_atom.sync<LMPHostType>();
+    }
+    if (mask && ANGLE_MASK) {
+      atomKK->k_num_angle.sync<LMPHostType>();
+      atomKK->k_angle_type.sync<LMPHostType>();
+      atomKK->k_angle_atom1.sync<LMPHostType>();
+      atomKK->k_angle_atom2.sync<LMPHostType>();
+      atomKK->k_angle_atom3.sync<LMPHostType>();
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecAngleKokkos::modified(ExecutionSpace space, unsigned int mask)
+{
+  if (space == Device) {
+    if (mask & X_MASK) atomKK->k_x.modify<LMPDeviceType>();
+    if (mask & V_MASK) atomKK->k_v.modify<LMPDeviceType>();
+    if (mask & F_MASK) atomKK->k_f.modify<LMPDeviceType>();
+    if (mask & TAG_MASK) atomKK->k_tag.modify<LMPDeviceType>();
+    if (mask & TYPE_MASK) atomKK->k_type.modify<LMPDeviceType>();
+    if (mask & MASK_MASK) atomKK->k_mask.modify<LMPDeviceType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPDeviceType>();
+    if (mask && MOLECULE_MASK) atomKK->k_molecule.modify<LMPDeviceType>();
+    if (mask && SPECIAL_MASK) {
+      atomKK->k_nspecial.modify<LMPDeviceType>();
+      atomKK->k_special.modify<LMPDeviceType>();
+    }
+    if (mask && BOND_MASK) {
+      atomKK->k_num_bond.modify<LMPDeviceType>();
+      atomKK->k_bond_type.modify<LMPDeviceType>();
+      atomKK->k_bond_atom.modify<LMPDeviceType>();
+    }
+    if (mask && ANGLE_MASK) {
+      atomKK->k_num_angle.modify<LMPDeviceType>();
+      atomKK->k_angle_type.modify<LMPDeviceType>();
+      atomKK->k_angle_atom1.modify<LMPDeviceType>();
+      atomKK->k_angle_atom2.modify<LMPDeviceType>();
+      atomKK->k_angle_atom3.modify<LMPDeviceType>();
+    }
+  } else {
+    if (mask & X_MASK) atomKK->k_x.modify<LMPHostType>();
+    if (mask & V_MASK) atomKK->k_v.modify<LMPHostType>();
+    if (mask & F_MASK) atomKK->k_f.modify<LMPHostType>();
+    if (mask & TAG_MASK) atomKK->k_tag.modify<LMPHostType>();
+    if (mask & TYPE_MASK) atomKK->k_type.modify<LMPHostType>();
+    if (mask & MASK_MASK) atomKK->k_mask.modify<LMPHostType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPHostType>();
+    if (mask && MOLECULE_MASK) atomKK->k_molecule.modify<LMPHostType>();
+    if (mask && SPECIAL_MASK) {
+      atomKK->k_nspecial.modify<LMPHostType>();
+      atomKK->k_special.modify<LMPHostType>();
+    }
+    if (mask && BOND_MASK) {
+      atomKK->k_num_bond.modify<LMPHostType>();
+      atomKK->k_bond_type.modify<LMPHostType>();
+      atomKK->k_bond_atom.modify<LMPHostType>();
+    }
+    if (mask && ANGLE_MASK) {
+      atomKK->k_num_angle.modify<LMPHostType>();
+      atomKK->k_angle_type.modify<LMPHostType>();
+      atomKK->k_angle_atom1.modify<LMPHostType>();
+      atomKK->k_angle_atom2.modify<LMPHostType>();
+      atomKK->k_angle_atom3.modify<LMPHostType>();
+    }
+  }
+}
diff --git a/src/KOKKOS/atom_vec_angle_kokkos.h b/src/KOKKOS/atom_vec_angle_kokkos.h
new file mode 100644
index 0000000000000000000000000000000000000000..b542761073f3ed7920b9336095c2ca4969f62533
--- /dev/null
+++ b/src/KOKKOS/atom_vec_angle_kokkos.h
@@ -0,0 +1,153 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef ATOM_CLASS
+
+AtomStyle(angle/kk,AtomVecAngleKokkos)
+
+#else
+
+#ifndef LMP_ATOM_VEC_ANGLE_KOKKOS_H
+#define LMP_ATOM_VEC_ANGLE_KOKKOS_H
+
+#include "atom_vec_kokkos.h"
+
+namespace LAMMPS_NS {
+
+class AtomVecAngleKokkos : public AtomVecKokkos {
+ public:
+  AtomVecAngleKokkos(class LAMMPS *);
+  virtual ~AtomVecAngleKokkos() {}
+  void grow(int);
+  void copy(int, int, int);
+  int pack_comm(int, int *, double *, int, int *);
+  int pack_comm_vel(int, int *, double *, int, int *);
+  void unpack_comm(int, int, double *);
+  void unpack_comm_vel(int, int, double *);
+  int pack_reverse(int, int, double *);
+  void unpack_reverse(int, int *, double *);
+  int pack_border(int, int *, double *, int, int *);
+  int pack_border_vel(int, int *, double *, int, int *);
+  int pack_border_hybrid(int, int *, double *);
+  void unpack_border(int, int, double *);
+  void unpack_border_vel(int, int, double *);
+  int unpack_border_hybrid(int, int, double *);
+  int pack_exchange(int, double *);
+  int unpack_exchange(double *);
+  int size_restart();
+  int pack_restart(int, double *);
+  int unpack_restart(double *);
+  void create_atom(int, double *);
+  void data_atom(double *, tagint, char **);
+  int data_atom_hybrid(int, char **);
+  void pack_data(double **);
+  int pack_data_hybrid(int, double *);
+  void write_data(FILE *, int, double **);
+  int write_data_hybrid(FILE *, double *);
+  bigint memory_usage();
+
+  void grow_reset();
+  int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
+                       const int & iswap,
+                       const DAT::tdual_xfloat_2d &buf,
+                       const int &pbc_flag, const int pbc[]);
+  void unpack_comm_kokkos(const int &n, const int &nfirst,
+                          const DAT::tdual_xfloat_2d &buf);
+  int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
+                     const int & iswap, const int nfirst,
+                     const int &pbc_flag, const int pbc[]);
+  int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
+                         DAT::tdual_xfloat_2d buf,int iswap,
+                         int pbc_flag, int *pbc, ExecutionSpace space);
+  void unpack_border_kokkos(const int &n, const int &nfirst,
+                            const DAT::tdual_xfloat_2d &buf,
+                            ExecutionSpace space);
+  int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
+                           DAT::tdual_int_1d k_sendlist,
+                           DAT::tdual_int_1d k_copylist,
+                           ExecutionSpace space, int dim,
+                           X_FLOAT lo, X_FLOAT hi);
+  int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
+                             int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
+                             ExecutionSpace space);
+
+  void sync(ExecutionSpace space, unsigned int mask);
+  void modified(ExecutionSpace space, unsigned int mask);
+
+ protected:
+
+  tagint *tag;
+  int *type,*mask;
+  imageint *image;
+  double **x,**v,**f;
+
+  tagint *molecule;
+  int **nspecial;
+  tagint **special;
+  int *num_bond;
+  int **bond_type;
+  tagint **bond_atom;
+
+  int *num_angle;
+  int **angle_type;
+  tagint **angle_atom1,**angle_atom2,**angle_atom3;
+
+  DAT::t_tagint_1d d_tag;
+  DAT::t_int_1d d_type, d_mask;
+  HAT::t_tagint_1d h_tag;
+  HAT::t_int_1d h_type, h_mask;
+
+  DAT::t_imageint_1d d_image;
+  HAT::t_imageint_1d h_image;
+
+  DAT::t_x_array d_x;
+  DAT::t_v_array d_v;
+  DAT::t_f_array d_f;
+  HAT::t_x_array h_x;
+  HAT::t_v_array h_v;
+  HAT::t_f_array h_f;
+
+  DAT::t_tagint_1d d_molecule;
+  DAT::t_int_2d d_nspecial;
+  DAT::t_tagint_2d d_special;
+  DAT::t_int_1d d_num_bond;
+  DAT::t_int_2d d_bond_type;
+  DAT::t_tagint_2d d_bond_atom;
+
+  HAT::t_tagint_1d h_molecule;
+  HAT::t_int_2d h_nspecial;
+  HAT::t_tagint_2d h_special;
+  HAT::t_int_1d h_num_bond;
+  HAT::t_int_2d h_bond_type;
+  HAT::t_tagint_2d h_bond_atom;
+
+  DAT::t_int_1d d_num_angle;
+  DAT::t_int_2d d_angle_type;
+  DAT::t_tagint_2d d_angle_atom1,d_angle_atom2,d_angle_atom3;
+
+  HAT::t_int_1d h_num_angle;
+  HAT::t_int_2d h_angle_type;
+  HAT::t_tagint_2d h_angle_atom1,h_angle_atom2,h_angle_atom3;
+
+  DAT::tdual_int_1d k_count;
+
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+*/
diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.cpp b/src/KOKKOS/atom_vec_atomic_kokkos.cpp
index 21255191292a28e30104090950cbd4b78d2306f0..9b06a49149ddd3ed87f2771f075c8a073bb21ceb 100644
--- a/src/KOKKOS/atom_vec_atomic_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_atomic_kokkos.cpp
@@ -553,14 +553,15 @@ void AtomVecAtomicKokkos::unpack_comm_vel(int n, int first, double *buf)
 
 int AtomVecAtomicKokkos::pack_reverse(int n, int first, double *buf)
 {
-  int i,m,last;
-
-  m = 0;
-  last = first + n;
-  for (i = first; i < last; i++) {
-    buf[m++] = f[i][0];
-    buf[m++] = f[i][1];
-    buf[m++] = f[i][2];
+  if(n > 0)
+    sync(Host,F_MASK);
+
+  int m = 0;
+  const int last = first + n;
+  for (int i = first; i < last; i++) {
+    buf[m++] = h_f(i,0);
+    buf[m++] = h_f(i,1);
+    buf[m++] = h_f(i,2);
   }
   return m;
 }
@@ -569,14 +570,17 @@ int AtomVecAtomicKokkos::pack_reverse(int n, int first, double *buf)
 
 void AtomVecAtomicKokkos::unpack_reverse(int n, int *list, double *buf)
 {
-  int i,j,m;
+  if(n > 0) {
+    sync(Host,F_MASK);
+    modified(Host,F_MASK);
+  }
 
-  m = 0;
-  for (i = 0; i < n; i++) {
-    j = list[i];
-    f[j][0] += buf[m++];
-    f[j][1] += buf[m++];
-    f[j][2] += buf[m++];
+  int m = 0;
+  for (int i = 0; i < n; i++) {
+    const int j = list[i];
+    h_f(j,0) += buf[m++];
+    h_f(j,1) += buf[m++];
+    h_f(j,2) += buf[m++];
   }
 }
 
@@ -588,11 +592,11 @@ struct AtomVecAtomicKokkos_PackBorder {
 
   typename ArrayTypes<DeviceType>::t_xfloat_2d _buf;
   const typename ArrayTypes<DeviceType>::t_int_2d_const _list;
+  const int _iswap;
   const typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
   const typename ArrayTypes<DeviceType>::t_tagint_1d _tag;
   const typename ArrayTypes<DeviceType>::t_int_1d _type;
   const typename ArrayTypes<DeviceType>::t_int_1d _mask;
-  const int _iswap;
   X_FLOAT _dx,_dy,_dz;
 
   AtomVecAtomicKokkos_PackBorder(
@@ -694,9 +698,9 @@ int AtomVecAtomicKokkos::pack_border(int n, int *list, double *buf,
       buf[m++] = h_x(j,0);
       buf[m++] = h_x(j,1);
       buf[m++] = h_x(j,2);
-      buf[m++] = h_tag[j];
-      buf[m++] = h_type[j];
-      buf[m++] = h_mask[j];
+      buf[m++] = ubuf(h_tag(j)).d;
+      buf[m++] = ubuf(h_type(j)).d;
+      buf[m++] = ubuf(h_mask(j)).d;
     }
   } else {
     if (domain->triclinic == 0) {
@@ -713,11 +717,16 @@ int AtomVecAtomicKokkos::pack_border(int n, int *list, double *buf,
       buf[m++] = h_x(j,0) + dx;
       buf[m++] = h_x(j,1) + dy;
       buf[m++] = h_x(j,2) + dz;
-      buf[m++] = h_tag[j];
-      buf[m++] = h_type[j];
-      buf[m++] = h_mask[j];
+      buf[m++] = ubuf(h_tag(j)).d;
+      buf[m++] = ubuf(h_type(j)).d;
+      buf[m++] = ubuf(h_mask(j)).d;
     }
   }
+
+  if (atom->nextra_border)
+    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
+      m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]);
+
   return m;
 }
 
@@ -736,9 +745,9 @@ int AtomVecAtomicKokkos::pack_border_vel(int n, int *list, double *buf,
       buf[m++] = h_x(j,0);
       buf[m++] = h_x(j,1);
       buf[m++] = h_x(j,2);
-      buf[m++] = h_tag[j];
-      buf[m++] = h_type[j];
-      buf[m++] = h_mask[j];
+      buf[m++] = ubuf(h_tag(j)).d;
+      buf[m++] = ubuf(h_type(j)).d;
+      buf[m++] = ubuf(h_mask(j)).d;
       buf[m++] = h_v(j,0);
       buf[m++] = h_v(j,1);
       buf[m++] = h_v(j,2);
@@ -759,9 +768,9 @@ int AtomVecAtomicKokkos::pack_border_vel(int n, int *list, double *buf,
         buf[m++] = h_x(j,0) + dx;
         buf[m++] = h_x(j,1) + dy;
         buf[m++] = h_x(j,2) + dz;
-        buf[m++] = h_tag[j];
-        buf[m++] = h_type[j];
-        buf[m++] = h_mask[j];
+        buf[m++] = ubuf(h_tag(j)).d;
+        buf[m++] = ubuf(h_type(j)).d;
+        buf[m++] = ubuf(h_mask(j)).d;
         buf[m++] = h_v(j,0);
         buf[m++] = h_v(j,1);
         buf[m++] = h_v(j,2);
@@ -775,9 +784,9 @@ int AtomVecAtomicKokkos::pack_border_vel(int n, int *list, double *buf,
         buf[m++] = h_x(j,0) + dx;
         buf[m++] = h_x(j,1) + dy;
         buf[m++] = h_x(j,2) + dz;
-        buf[m++] = h_tag[j];
-        buf[m++] = h_type[j];
-        buf[m++] = h_mask[j];
+        buf[m++] = ubuf(h_tag(j)).d;
+        buf[m++] = ubuf(h_type(j)).d;
+        buf[m++] = ubuf(h_mask(j)).d;
         if (mask[i] & deform_groupbit) {
           buf[m++] = h_v(j,0) + dvx;
           buf[m++] = h_v(j,1) + dvy;
@@ -790,6 +799,11 @@ int AtomVecAtomicKokkos::pack_border_vel(int n, int *list, double *buf,
       }
     }
   }
+
+  if (atom->nextra_border)
+    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
+      m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]);
+
   return m;
 }
 
@@ -861,10 +875,15 @@ void AtomVecAtomicKokkos::unpack_border(int n, int first, double *buf)
     h_x(i,0) = buf[m++];
     h_x(i,1) = buf[m++];
     h_x(i,2) = buf[m++];
-    h_tag[i] = static_cast<int> (buf[m++]);
-    h_type[i] = static_cast<int> (buf[m++]);
-    h_mask[i] = static_cast<int> (buf[m++]);
+    h_tag(i) =  (tagint)  ubuf(buf[m++]).i;
+    h_type(i) = (int) ubuf(buf[m++]).i;
+    h_mask(i) = (int) ubuf(buf[m++]).i;
   }
+
+  if (atom->nextra_border)
+    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
+      m += modify->fix[atom->extra_border[iextra]]->
+        unpack_border(n,first,&buf[m]);
 }
 
 /* ---------------------------------------------------------------------- */
@@ -880,13 +899,18 @@ void AtomVecAtomicKokkos::unpack_border_vel(int n, int first, double *buf)
     h_x(i,0) = buf[m++];
     h_x(i,1) = buf[m++];
     h_x(i,2) = buf[m++];
-    h_tag[i] = static_cast<int> (buf[m++]);
-    h_type[i] = static_cast<int> (buf[m++]);
-    h_mask[i] = static_cast<int> (buf[m++]);
+    h_tag(i) =  (tagint)  ubuf(buf[m++]).i;
+    h_type(i) = (int) ubuf(buf[m++]).i;
+    h_mask(i) = (int) ubuf(buf[m++]).i;
     h_v(i,0) = buf[m++];
     h_v(i,1) = buf[m++];
     h_v(i,2) = buf[m++];
   }
+
+  if (atom->nextra_border)
+    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
+      m += modify->fix[atom->extra_border[iextra]]->
+        unpack_border(n,first,&buf[m]);
 }
 
 /* ---------------------------------------------------------------------- */
@@ -895,7 +919,6 @@ template<class DeviceType>
 struct AtomVecAtomicKokkos_PackExchangeFunctor {
   typedef DeviceType device_type;
   typedef ArrayTypes<DeviceType> AT;
-  X_FLOAT _lo,_hi;
   typename AT::t_x_array_randomread _x;
   typename AT::t_v_array_randomread _v;
   typename AT::t_tagint_1d_randomread _tag;
@@ -910,9 +933,10 @@ struct AtomVecAtomicKokkos_PackExchangeFunctor {
   typename AT::t_imageint_1d _imagew;
 
   typename AT::t_xfloat_2d_um _buf;
-  int _nlocal,_dim;
   typename AT::t_int_1d_const _sendlist;
   typename AT::t_int_1d_const _copylist;
+  int _nlocal,_dim;
+  X_FLOAT _lo,_hi;
 
   AtomVecAtomicKokkos_PackExchangeFunctor(
       const AtomKokkos* atom,
@@ -977,7 +1001,7 @@ struct AtomVecAtomicKokkos_PackExchangeFunctor {
 
 int AtomVecAtomicKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space,int dim,X_FLOAT lo,X_FLOAT hi )
 {
-  if(nsend > (k_buf.view<LMPHostType>().dimension_0()*k_buf.view<LMPHostType>().dimension_1())/11) {
+  if(nsend > (int) (k_buf.view<LMPHostType>().dimension_0()*k_buf.view<LMPHostType>().dimension_1())/11) {
     int newsize = nsend*11/k_buf.view<LMPHostType>().dimension_1()+1;
     k_buf.resize(newsize,k_buf.view<LMPHostType>().dimension_1());
   }
@@ -1005,10 +1029,10 @@ int AtomVecAtomicKokkos::pack_exchange(int i, double *buf)
   buf[m++] = h_v(i,0);
   buf[m++] = h_v(i,1);
   buf[m++] = h_v(i,2);
-  buf[m++] = h_tag[i];
-  buf[m++] = h_type[i];
-  buf[m++] = h_mask[i];
-  *((tagint *) &buf[m++]) = h_image[i];
+  buf[m++] = ubuf(h_tag(i)).d;
+  buf[m++] = ubuf(h_type(i)).d;
+  buf[m++] = ubuf(h_mask(i)).d;
+  buf[m++] = ubuf(h_image(i)).d;
 
   if (atom->nextra_grow)
     for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
@@ -1024,7 +1048,6 @@ template<class DeviceType>
 struct AtomVecAtomicKokkos_UnpackExchangeFunctor {
   typedef DeviceType device_type;
   typedef ArrayTypes<DeviceType> AT;
-  X_FLOAT _lo,_hi;
   typename AT::t_x_array _x;
   typename AT::t_v_array _v;
   typename AT::t_tagint_1d _tag;
@@ -1033,8 +1056,9 @@ struct AtomVecAtomicKokkos_UnpackExchangeFunctor {
   typename AT::t_imageint_1d _image;
 
   typename AT::t_xfloat_2d_um _buf;
-  int _dim;
   typename AT::t_int_1d _nlocal;
+  int _dim;
+  X_FLOAT _lo,_hi;
 
   AtomVecAtomicKokkos_UnpackExchangeFunctor(
       const AtomKokkos* atom,
@@ -1113,10 +1137,10 @@ int AtomVecAtomicKokkos::unpack_exchange(double *buf)
   h_v(nlocal,0) = buf[m++];
   h_v(nlocal,1) = buf[m++];
   h_v(nlocal,2) = buf[m++];
-  h_tag[nlocal] = static_cast<int> (buf[m++]);
-  h_type[nlocal] = static_cast<int> (buf[m++]);
-  h_mask[nlocal] = static_cast<int> (buf[m++]);
-  h_image[nlocal] = static_cast<int> (buf[m++]);
+  h_tag(nlocal) = (tagint) ubuf(buf[m++]).i;
+  h_type(nlocal) = (int) ubuf(buf[m++]).i;
+  h_mask(nlocal) = (int) ubuf(buf[m++]).i;
+  h_image(nlocal) = (imageint) ubuf(buf[m++]).i;
 
   if (atom->nextra_grow)
     for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
@@ -1159,10 +1183,10 @@ int AtomVecAtomicKokkos::pack_restart(int i, double *buf)
   buf[m++] = h_x(i,0);
   buf[m++] = h_x(i,1);
   buf[m++] = h_x(i,2);
-  buf[m++] = h_tag[i];
-  buf[m++] = h_type[i];
-  buf[m++] = h_mask[i];
-  buf[m++] = h_image[i];
+  buf[m++] = ubuf(h_tag(i)).d;
+  buf[m++] = ubuf(h_type(i)).d;
+  buf[m++] = ubuf(h_mask(i)).d;
+  buf[m++] = ubuf(h_image(i)).d;
   buf[m++] = h_v(i,0);
   buf[m++] = h_v(i,1);
   buf[m++] = h_v(i,2);
@@ -1192,17 +1216,17 @@ int AtomVecAtomicKokkos::unpack_restart(double *buf)
   h_x(nlocal,0) = buf[m++];
   h_x(nlocal,1) = buf[m++];
   h_x(nlocal,2) = buf[m++];
-  h_tag[nlocal] = static_cast<int> (buf[m++]);
-  h_type[nlocal] = static_cast<int> (buf[m++]);
-  h_mask[nlocal] = static_cast<int> (buf[m++]);
-  h_image[nlocal] = *((tagint *) &buf[m++]);
+  h_tag(nlocal) = (tagint) ubuf(buf[m++]).i;
+  h_type(nlocal) = (int) ubuf(buf[m++]).i;
+  h_mask(nlocal) = (int) ubuf(buf[m++]).i;
+  h_image(nlocal) = (imageint) ubuf(buf[m++]).i;
   h_v(nlocal,0) = buf[m++];
   h_v(nlocal,1) = buf[m++];
   h_v(nlocal,2) = buf[m++];
 
   double **extra = atom->extra;
   if (atom->nextra_store) {
-    int size = static_cast<int> (buf[0]) - m;
+    int size = static_cast<int> (ubuf(buf[m++]).i) - m;
     for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++];
   }
 
diff --git a/src/KOKKOS/atom_vec_bond_kokkos.cpp b/src/KOKKOS/atom_vec_bond_kokkos.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a1b2c7823cbd0fdf1865a3f27ceb9f13fd673b11
--- /dev/null
+++ b/src/KOKKOS/atom_vec_bond_kokkos.cpp
@@ -0,0 +1,1717 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "stdlib.h"
+#include "atom_vec_bond_kokkos.h"
+#include "atom_kokkos.h"
+#include "comm_kokkos.h"
+#include "domain.h"
+#include "modify.h"
+#include "fix.h"
+#include "atom_masks.h"
+#include "memory.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+#define DELTA 10000
+
+/* ---------------------------------------------------------------------- */
+
+AtomVecBondKokkos::AtomVecBondKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp)
+{
+  molecular = 1;
+  bonds_allow = 1;
+  mass_type = 1;
+
+  comm_x_only = comm_f_only = 1;
+  size_forward = 3;
+  size_reverse = 3;
+  size_border = 7;
+  size_velocity = 3;
+  size_data_atom = 6;
+  size_data_vel = 4;
+  xcol_data = 4;
+
+  k_count = DAT::tdual_int_1d("atom::k_count",1);
+  atomKK = (AtomKokkos *) atom;
+  commKK = (CommKokkos *) comm;
+}
+
+/* ----------------------------------------------------------------------
+   grow atom arrays
+   n = 0 grows arrays by DELTA
+   n > 0 allocates arrays to size n
+------------------------------------------------------------------------- */
+
+void AtomVecBondKokkos::grow(int n)
+{
+  if (n == 0) nmax += DELTA;
+  else nmax = n;
+  atomKK->nmax = nmax;
+  if (nmax < 0 || nmax > MAXSMALLINT)
+    error->one(FLERR,"Per-processor system is too big");
+
+  sync(Device,ALL_MASK);
+  modified(Device,ALL_MASK);
+
+  memory->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag");
+  memory->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type");
+  memory->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask");
+  memory->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image");
+
+  memory->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x");
+  memory->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v");
+  memory->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f");
+
+  memory->grow_kokkos(atomKK->k_molecule,atomKK->molecule,nmax,"atom:molecule");
+  memory->grow_kokkos(atomKK->k_nspecial,atomKK->nspecial,nmax,3,"atom:nspecial");
+  memory->grow_kokkos(atomKK->k_special,atomKK->special,nmax,atomKK->maxspecial,"atom:special");
+  memory->grow_kokkos(atomKK->k_num_bond,atomKK->num_bond,nmax,"atom:num_bond");
+  memory->grow_kokkos(atomKK->k_bond_type,atomKK->bond_type,nmax,atomKK->bond_per_atom,"atom:bond_type");
+  memory->grow_kokkos(atomKK->k_bond_atom,atomKK->bond_atom,nmax,atomKK->bond_per_atom,"atom:bond_atom");
+
+  grow_reset();
+  sync(Host,ALL_MASK);
+
+  if (atom->nextra_grow)
+    for (int iextra = 0; iextra < atomKK->nextra_grow; iextra++)
+      modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax);
+}
+
+/* ----------------------------------------------------------------------
+   reset local array ptrs
+------------------------------------------------------------------------- */
+
+void AtomVecBondKokkos::grow_reset()
+{
+  tag = atomKK->tag;
+  d_tag = atomKK->k_tag.d_view;
+  h_tag = atomKK->k_tag.h_view;
+
+  type = atomKK->type;
+  d_type = atomKK->k_type.d_view;
+  h_type = atomKK->k_type.h_view;
+  mask = atomKK->mask;
+  d_mask = atomKK->k_mask.d_view;
+  h_mask = atomKK->k_mask.h_view;
+  image = atomKK->image;
+  d_image = atomKK->k_image.d_view;
+  h_image = atomKK->k_image.h_view;
+
+  x = atomKK->x;
+  d_x = atomKK->k_x.d_view;
+  h_x = atomKK->k_x.h_view;
+  v = atomKK->v;
+  d_v = atomKK->k_v.d_view;
+  h_v = atomKK->k_v.h_view;
+  f = atomKK->f;
+  d_f = atomKK->k_f.d_view;
+  h_f = atomKK->k_f.h_view;
+
+  molecule = atomKK->molecule;
+  d_molecule = atomKK->k_molecule.d_view;
+  h_molecule = atomKK->k_molecule.h_view;
+  nspecial = atomKK->nspecial;
+  d_nspecial = atomKK->k_nspecial.d_view;
+  h_nspecial = atomKK->k_nspecial.h_view;
+  special = atomKK->special;
+  d_special = atomKK->k_special.d_view;
+  h_special = atomKK->k_special.h_view;
+  num_bond = atomKK->num_bond;
+  d_num_bond = atomKK->k_num_bond.d_view;
+  h_num_bond = atomKK->k_num_bond.h_view;
+  bond_type = atomKK->bond_type;
+  d_bond_type = atomKK->k_bond_type.d_view;
+  h_bond_type = atomKK->k_bond_type.h_view;
+  bond_atom = atomKK->bond_atom;
+  d_bond_atom = atomKK->k_bond_atom.d_view;
+  h_bond_atom = atomKK->k_bond_atom.h_view;
+}
+
+/* ----------------------------------------------------------------------
+   copy atom I info to atom J
+------------------------------------------------------------------------- */
+
+void AtomVecBondKokkos::copy(int i, int j, int delflag)
+{
+  int k;
+
+  h_tag[j] = h_tag[i];
+  h_type[j] = h_type[i];
+  mask[j] = mask[i];
+  h_image[j] = h_image[i];
+  h_x(j,0) = h_x(i,0);
+  h_x(j,1) = h_x(i,1);
+  h_x(j,2) = h_x(i,2);
+  h_v(j,0) = h_v(i,0);
+  h_v(j,1) = h_v(i,1);
+  h_v(j,2) = h_v(i,2);
+
+  h_molecule(j) = h_molecule(i);
+
+  h_num_bond(j) = h_num_bond(i);
+  for (k = 0; k < h_num_bond(j); k++) {
+    h_bond_type(j,k) = h_bond_type(i,k);
+    h_bond_atom(j,k) = h_bond_atom(i,k);
+  }
+
+  h_nspecial(j,0) = h_nspecial(i,0);
+  h_nspecial(j,1) = h_nspecial(i,1);
+  h_nspecial(j,2) = h_nspecial(i,2);
+  for (k = 0; k < h_nspecial(j,2); k++) h_special(j,k) = h_special(i,k);
+
+  if (atom->nextra_grow)
+    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
+      modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType,int PBC_FLAG,int TRICLINIC>
+struct AtomVecBondKokkos_PackComm {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
+  typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
+  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
+  const int _iswap;
+  X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
+  X_FLOAT _pbc[6];
+
+  AtomVecBondKokkos_PackComm(
+      const typename DAT::tdual_x_array &x,
+      const typename DAT::tdual_xfloat_2d &buf,
+      const typename DAT::tdual_int_2d &list,
+      const int & iswap,
+      const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
+      const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
+      _x(x.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap),
+      _xprd(xprd),_yprd(yprd),_zprd(zprd),
+      _xy(xy),_xz(xz),_yz(yz) {
+        const size_t maxsend = (buf.view<DeviceType>().dimension_0()*buf.view<DeviceType>().dimension_1())/3;
+        const size_t elements = 3;
+        buffer_view<DeviceType>(_buf,buf,maxsend,elements);
+        _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
+        _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
+  };
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+        const int j = _list(_iswap,i);
+      if (PBC_FLAG == 0) {
+          _buf(i,0) = _x(j,0);
+          _buf(i,1) = _x(j,1);
+          _buf(i,2) = _x(j,2);
+      } else {
+        if (TRICLINIC == 0) {
+          _buf(i,0) = _x(j,0) + _pbc[0]*_xprd;
+          _buf(i,1) = _x(j,1) + _pbc[1]*_yprd;
+          _buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
+        } else {
+          _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
+          _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
+          _buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
+        }
+      }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecBondKokkos::pack_comm_kokkos(const int &n,
+                                        const DAT::tdual_int_2d &list,
+                                        const int & iswap,
+                                        const DAT::tdual_xfloat_2d &buf,
+                                        const int &pbc_flag,
+                                        const int* const pbc)
+{
+  // Check whether to always run forward communication on the host
+  // Choose correct forward PackComm kernel
+
+  if(commKK->forward_comm_on_host) {
+    sync(Host,X_MASK);
+    if(pbc_flag) {
+      if(domain->triclinic) {
+        struct AtomVecBondKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      } else {
+        struct AtomVecBondKokkos_PackComm<LMPHostType,1,0> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      }
+    } else {
+      if(domain->triclinic) {
+        struct AtomVecBondKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      } else {
+        struct AtomVecBondKokkos_PackComm<LMPHostType,0,0> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      }
+    }
+    LMPHostType::fence();
+  } else {
+    sync(Device,X_MASK);
+    if(pbc_flag) {
+      if(domain->triclinic) {
+        struct AtomVecBondKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      } else {
+        struct AtomVecBondKokkos_PackComm<LMPDeviceType,1,0> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      }
+    } else {
+      if(domain->triclinic) {
+        struct AtomVecBondKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      } else {
+        struct AtomVecBondKokkos_PackComm<LMPDeviceType,0,0> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      }
+    }
+    LMPDeviceType::fence();
+  }
+
+	return n*size_forward;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType,int PBC_FLAG,int TRICLINIC>
+struct AtomVecBondKokkos_PackCommSelf {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
+  typename ArrayTypes<DeviceType>::t_x_array _xw;
+  int _nfirst;
+  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
+  const int _iswap;
+  X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
+  X_FLOAT _pbc[6];
+
+  AtomVecBondKokkos_PackCommSelf(
+      const typename DAT::tdual_x_array &x,
+      const int &nfirst,
+      const typename DAT::tdual_int_2d &list,
+      const int & iswap,
+      const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
+      const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
+      _x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),_nfirst(nfirst),_list(list.view<DeviceType>()),_iswap(iswap),
+      _xprd(xprd),_yprd(yprd),_zprd(zprd),
+      _xy(xy),_xz(xz),_yz(yz) {
+        _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
+        _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
+  };
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+        const int j = _list(_iswap,i);
+      if (PBC_FLAG == 0) {
+          _xw(i+_nfirst,0) = _x(j,0);
+          _xw(i+_nfirst,1) = _x(j,1);
+          _xw(i+_nfirst,2) = _x(j,2);
+      } else {
+        if (TRICLINIC == 0) {
+          _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd;
+          _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd;
+          _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
+        } else {
+          _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
+          _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
+          _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
+        }
+      }
+
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecBondKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap,
+										const int nfirst, const int &pbc_flag, const int* const pbc) {
+  if(commKK->forward_comm_on_host) {
+    sync(Host,X_MASK);
+    modified(Host,X_MASK);
+    if(pbc_flag) {
+      if(domain->triclinic) {
+      struct AtomVecBondKokkos_PackCommSelf<LMPHostType,1,1> f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      } else {
+      struct AtomVecBondKokkos_PackCommSelf<LMPHostType,1,0> f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      }
+    } else {
+      if(domain->triclinic) {
+      struct AtomVecBondKokkos_PackCommSelf<LMPHostType,0,1> f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      } else {
+      struct AtomVecBondKokkos_PackCommSelf<LMPHostType,0,0> f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      }
+    }
+    LMPHostType::fence();
+  } else {
+    sync(Device,X_MASK);
+    modified(Device,X_MASK);
+    if(pbc_flag) {
+      if(domain->triclinic) {
+      struct AtomVecBondKokkos_PackCommSelf<LMPDeviceType,1,1> f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      } else {
+      struct AtomVecBondKokkos_PackCommSelf<LMPDeviceType,1,0> f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      }
+    } else {
+      if(domain->triclinic) {
+      struct AtomVecBondKokkos_PackCommSelf<LMPDeviceType,0,1> f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      } else {
+      struct AtomVecBondKokkos_PackCommSelf<LMPDeviceType,0,0> f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      }
+    }
+    LMPDeviceType::fence();
+  }
+	return n*3;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecBondKokkos_UnpackComm {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_x_array _x;
+  typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf;
+  int _first;
+
+  AtomVecBondKokkos_UnpackComm(
+      const typename DAT::tdual_x_array &x,
+      const typename DAT::tdual_xfloat_2d &buf,
+      const int& first):_x(x.view<DeviceType>()),_buf(buf.view<DeviceType>()),
+                        _first(first) {};
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+      _x(i+_first,0) = _buf(i,0);
+      _x(i+_first,1) = _buf(i,1);
+      _x(i+_first,2) = _buf(i,2);
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecBondKokkos::unpack_comm_kokkos(const int &n, const int &first,
+    const DAT::tdual_xfloat_2d &buf ) {
+  if(commKK->forward_comm_on_host) {
+    sync(Host,X_MASK);
+    modified(Host,X_MASK);
+    struct AtomVecBondKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
+    Kokkos::parallel_for(n,f);
+    LMPDeviceType::fence();
+  } else {
+    sync(Device,X_MASK);
+    modified(Device,X_MASK);
+    struct AtomVecBondKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
+    Kokkos::parallel_for(n,f);
+    LMPDeviceType::fence();
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecBondKokkos::pack_comm(int n, int *list, double *buf,
+                                 int pbc_flag, int *pbc)
+{
+  int i,j,m;
+  double dx,dy,dz;
+
+  m = 0;
+  if (pbc_flag == 0) {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0);
+      buf[m++] = h_x(j,1);
+      buf[m++] = h_x(j,2);
+    }
+  } else {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
+      dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
+      dz = pbc[2]*domain->zprd;
+    }
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0) + dx;
+      buf[m++] = h_x(j,1) + dy;
+      buf[m++] = h_x(j,2) + dz;
+    }
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecBondKokkos::pack_comm_vel(int n, int *list, double *buf,
+                                     int pbc_flag, int *pbc)
+{
+  int i,j,m;
+  double dx,dy,dz,dvx,dvy,dvz;
+
+  m = 0;
+  if (pbc_flag == 0) {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0);
+      buf[m++] = h_x(j,1);
+      buf[m++] = h_x(j,2);
+      buf[m++] = h_v(j,0);
+      buf[m++] = h_v(j,1);
+      buf[m++] = h_v(j,2);
+    }
+  } else {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
+      dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
+      dz = pbc[2]*domain->zprd;
+    }
+    if (!deform_vremap) {
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0) + dx;
+        buf[m++] = h_x(j,1) + dy;
+        buf[m++] = h_x(j,2) + dz;
+        buf[m++] = h_v(j,0);
+        buf[m++] = h_v(j,1);
+        buf[m++] = h_v(j,2);
+      }
+    } else {
+      dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
+      dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
+      dvz = pbc[2]*h_rate[2];
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0) + dx;
+        buf[m++] = h_x(j,1) + dy;
+        buf[m++] = h_x(j,2) + dz;
+        if (mask[i] & deform_groupbit) {
+          buf[m++] = h_v(j,0) + dvx;
+          buf[m++] = h_v(j,1) + dvy;
+          buf[m++] = h_v(j,2) + dvz;
+        } else {
+          buf[m++] = h_v(j,0);
+          buf[m++] = h_v(j,1);
+          buf[m++] = h_v(j,2);
+        }
+      }
+    }
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecBondKokkos::unpack_comm(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    h_x(i,0) = buf[m++];
+    h_x(i,1) = buf[m++];
+    h_x(i,2) = buf[m++];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecBondKokkos::unpack_comm_vel(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    h_x(i,0) = buf[m++];
+    h_x(i,1) = buf[m++];
+    h_x(i,2) = buf[m++];
+    h_v(i,0) = buf[m++];
+    h_v(i,1) = buf[m++];
+    h_v(i,2) = buf[m++];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecBondKokkos::pack_reverse(int n, int first, double *buf)
+{
+  if(n > 0)
+    sync(Host,F_MASK);
+
+  int m = 0;
+  const int last = first + n;
+  for (int i = first; i < last; i++) {
+    buf[m++] = h_f(i,0);
+    buf[m++] = h_f(i,1);
+    buf[m++] = h_f(i,2);
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecBondKokkos::unpack_reverse(int n, int *list, double *buf)
+{
+  if(n > 0)
+    modified(Host,F_MASK);
+
+  int m = 0;
+  for (int i = 0; i < n; i++) {
+    const int j = list[i];
+    h_f(j,0) += buf[m++];
+    h_f(j,1) += buf[m++];
+    h_f(j,2) += buf[m++];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType,int PBC_FLAG>
+struct AtomVecBondKokkos_PackBorder {
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+
+  typename AT::t_xfloat_2d _buf;
+  const typename AT::t_int_2d_const _list;
+  const int _iswap;
+  const typename AT::t_x_array_randomread _x;
+  const typename AT::t_tagint_1d _tag;
+  const typename AT::t_int_1d _type;
+  const typename AT::t_int_1d _mask;
+  const typename AT::t_tagint_1d _molecule;
+  X_FLOAT _dx,_dy,_dz;
+
+  AtomVecBondKokkos_PackBorder(
+      const typename AT::t_xfloat_2d &buf,
+      const typename AT::t_int_2d_const &list,
+      const int & iswap,
+      const typename AT::t_x_array &x,
+      const typename AT::t_tagint_1d &tag,
+      const typename AT::t_int_1d &type,
+      const typename AT::t_int_1d &mask,
+      const typename AT::t_tagint_1d &molecule,
+      const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz):
+      _buf(buf),_list(list),_iswap(iswap),
+      _x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule),
+      _dx(dx),_dy(dy),_dz(dz) {}
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+      const int j = _list(_iswap,i);
+      if (PBC_FLAG == 0) {
+          _buf(i,0) = _x(j,0);
+          _buf(i,1) = _x(j,1);
+          _buf(i,2) = _x(j,2);
+          _buf(i,3) = _tag(j);
+          _buf(i,4) = _type(j);
+          _buf(i,5) = _mask(j);
+          _buf(i,6) = _molecule(j);
+      } else {
+          _buf(i,0) = _x(j,0) + _dx;
+          _buf(i,1) = _x(j,1) + _dy;
+          _buf(i,2) = _x(j,2) + _dz;
+          _buf(i,3) = _tag(j);
+          _buf(i,4) = _type(j);
+          _buf(i,5) = _mask(j);
+          _buf(i,6) = _molecule(j);
+      }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecBondKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
+                                          DAT::tdual_xfloat_2d buf,int iswap,
+                                          int pbc_flag, int *pbc, ExecutionSpace space)
+{
+  X_FLOAT dx,dy,dz;
+
+  if (pbc_flag != 0) {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0];
+      dy = pbc[1];
+      dz = pbc[2];
+    }
+    if(space==Host) {
+      AtomVecBondKokkos_PackBorder<LMPHostType,1> f(
+        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
+        iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+      LMPHostType::fence();
+    } else {
+      AtomVecBondKokkos_PackBorder<LMPDeviceType,1> f(
+        buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
+        iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+      LMPDeviceType::fence();
+    }
+
+  } else {
+    dx = dy = dz = 0;
+    if(space==Host) {
+      AtomVecBondKokkos_PackBorder<LMPHostType,0> f(
+        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
+        iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+      LMPHostType::fence();
+    } else {
+      AtomVecBondKokkos_PackBorder<LMPDeviceType,0> f(
+        buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
+        iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+      LMPDeviceType::fence();
+    }
+  }
+  return n*size_border;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecBondKokkos::pack_border(int n, int *list, double *buf,
+                                   int pbc_flag, int *pbc)
+{
+  int i,j,m;
+  double dx,dy,dz;
+
+  m = 0;
+  if (pbc_flag == 0) {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0);
+      buf[m++] = h_x(j,1);
+      buf[m++] = h_x(j,2);
+      buf[m++] = ubuf(h_tag(j)).d;
+      buf[m++] = ubuf(h_type(j)).d;
+      buf[m++] = ubuf(h_mask(j)).d;
+      buf[m++] = ubuf(h_molecule(j)).d;
+    }
+  } else {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0];
+      dy = pbc[1];
+      dz = pbc[2];
+    }
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0) + dx;
+      buf[m++] = h_x(j,1) + dy;
+      buf[m++] = h_x(j,2) + dz;
+      buf[m++] = ubuf(h_tag(j)).d;
+      buf[m++] = ubuf(h_type(j)).d;
+      buf[m++] = ubuf(h_mask(j)).d;
+      buf[m++] = ubuf(h_molecule(j)).d;
+    }
+  }
+
+  if (atom->nextra_border)
+    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
+      m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]);
+
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecBondKokkos::pack_border_vel(int n, int *list, double *buf,
+                                       int pbc_flag, int *pbc)
+{
+  int i,j,m;
+  double dx,dy,dz,dvx,dvy,dvz;
+
+  m = 0;
+  if (pbc_flag == 0) {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0);
+      buf[m++] = h_x(j,1);
+      buf[m++] = h_x(j,2);
+      buf[m++] = ubuf(h_tag(j)).d;
+      buf[m++] = ubuf(h_type(j)).d;
+      buf[m++] = ubuf(h_mask(j)).d;
+      buf[m++] = ubuf(h_molecule(j)).d;
+      buf[m++] = h_v(j,0);
+      buf[m++] = h_v(j,1);
+      buf[m++] = h_v(j,2);
+    }
+  } else {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0];
+      dy = pbc[1];
+      dz = pbc[2];
+    }
+    if (!deform_vremap) {
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0) + dx;
+        buf[m++] = h_x(j,1) + dy;
+        buf[m++] = h_x(j,2) + dz;
+        buf[m++] = ubuf(h_tag(j)).d;
+        buf[m++] = ubuf(h_type(j)).d;
+        buf[m++] = ubuf(h_mask(j)).d;
+        buf[m++] = ubuf(h_molecule(j)).d;
+        buf[m++] = h_v(j,0);
+        buf[m++] = h_v(j,1);
+        buf[m++] = h_v(j,2);
+      }
+    } else {
+      dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
+      dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
+      dvz = pbc[2]*h_rate[2];
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0) + dx;
+        buf[m++] = h_x(j,1) + dy;
+        buf[m++] = h_x(j,2) + dz;
+        buf[m++] = ubuf(h_tag(j)).d;
+        buf[m++] = ubuf(h_type(j)).d;
+        buf[m++] = ubuf(h_mask(j)).d;
+        buf[m++] = ubuf(h_molecule(j)).d;
+        if (mask[i] & deform_groupbit) {
+          buf[m++] = h_v(j,0) + dvx;
+          buf[m++] = h_v(j,1) + dvy;
+          buf[m++] = h_v(j,2) + dvz;
+        } else {
+          buf[m++] = h_v(j,0);
+          buf[m++] = h_v(j,1);
+          buf[m++] = h_v(j,2);
+        }
+      }
+    }
+  }
+
+  if (atom->nextra_border)
+    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
+      m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]);
+
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecBondKokkos::pack_border_hybrid(int n, int *list, double *buf)
+{
+  int i,j,m;
+
+  m = 0;
+  for (i = 0; i < n; i++) {
+    j = list[i];
+    buf[m++] = h_molecule(j);
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecBondKokkos_UnpackBorder {
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+
+  const typename AT::t_xfloat_2d_const _buf;
+  typename AT::t_x_array _x;
+  typename AT::t_tagint_1d _tag;
+  typename AT::t_int_1d _type;
+  typename AT::t_int_1d _mask;
+  typename AT::t_tagint_1d _molecule;
+  int _first;
+
+
+  AtomVecBondKokkos_UnpackBorder(
+      const typename AT::t_xfloat_2d_const &buf,
+      typename AT::t_x_array &x,
+      typename AT::t_tagint_1d &tag,
+      typename AT::t_int_1d &type,
+      typename AT::t_int_1d &mask,
+      typename AT::t_tagint_1d &molecule,
+      const int& first):
+    _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule),
+    _first(first){
+  };
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+      _x(i+_first,0) = _buf(i,0);
+      _x(i+_first,1) = _buf(i,1);
+      _x(i+_first,2) = _buf(i,2);
+      _tag(i+_first) = static_cast<int> (_buf(i,3));
+      _type(i+_first) = static_cast<int>  (_buf(i,4));
+      _mask(i+_first) = static_cast<int>  (_buf(i,5));
+      _molecule(i+_first) = static_cast<int> (_buf(i,6));
+
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecBondKokkos::unpack_border_kokkos(const int &n, const int &first,
+                                             const DAT::tdual_xfloat_2d &buf,
+                                             ExecutionSpace space) {
+  modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
+  while (first+n >= nmax) grow(0);
+  modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
+  if(space==Host) {
+    struct AtomVecBondKokkos_UnpackBorder<LMPHostType>
+      f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_molecule,first);
+    Kokkos::parallel_for(n,f);
+    LMPHostType::fence();
+  } else {
+    struct AtomVecBondKokkos_UnpackBorder<LMPDeviceType>
+      f(buf.view<LMPDeviceType>(),d_x,d_tag,d_type,d_mask,d_molecule,first);
+    Kokkos::parallel_for(n,f);
+    LMPDeviceType::fence();
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecBondKokkos::unpack_border(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    if (i == nmax) grow(0);
+    modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
+    h_x(i,0) = buf[m++];
+    h_x(i,1) = buf[m++];
+    h_x(i,2) = buf[m++];
+    h_tag(i) =  (tagint)  ubuf(buf[m++]).i;
+    h_type(i) = (int) ubuf(buf[m++]).i;
+    h_mask(i) = (int) ubuf(buf[m++]).i;
+    h_molecule(i) = (tagint) ubuf(buf[m++]).i;
+  }
+
+  if (atom->nextra_border)
+    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
+      m += modify->fix[atom->extra_border[iextra]]->
+        unpack_border(n,first,&buf[m]);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecBondKokkos::unpack_border_vel(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    if (i == nmax) grow(0);
+    h_x(i,0) = buf[m++];
+    h_x(i,1) = buf[m++];
+    h_x(i,2) = buf[m++];
+    h_tag(i) =  (tagint)  ubuf(buf[m++]).i;
+    h_type(i) = (int) ubuf(buf[m++]).i;
+    h_mask(i) = (int) ubuf(buf[m++]).i;
+    h_molecule(i) = (tagint) ubuf(buf[m++]).i;
+    h_v(i,0) = buf[m++];
+    h_v(i,1) = buf[m++];
+    h_v(i,2) = buf[m++];
+  }
+
+  if (atom->nextra_border)
+    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
+      m += modify->fix[atom->extra_border[iextra]]->
+        unpack_border(n,first,&buf[m]);
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecBondKokkos::unpack_border_hybrid(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++)
+    h_molecule(i) = (tagint) ubuf(buf[m++]).i;
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecBondKokkos_PackExchangeFunctor {
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+  typename AT::t_x_array_randomread _x;
+  typename AT::t_v_array_randomread _v;
+  typename AT::t_tagint_1d_randomread _tag;
+  typename AT::t_int_1d_randomread _type;
+  typename AT::t_int_1d_randomread _mask;
+  typename AT::t_imageint_1d_randomread _image;
+  typename AT::t_tagint_1d_randomread _molecule;
+  typename AT::t_int_2d_randomread _nspecial;
+  typename AT::t_tagint_2d_randomread _special;
+  typename AT::t_int_1d_randomread _num_bond;
+  typename AT::t_int_2d_randomread _bond_type;
+  typename AT::t_tagint_2d_randomread _bond_atom;
+  typename AT::t_x_array _xw;
+  typename AT::t_v_array _vw;
+  typename AT::t_tagint_1d _tagw;
+  typename AT::t_int_1d _typew;
+  typename AT::t_int_1d _maskw;
+  typename AT::t_imageint_1d _imagew;
+  typename AT::t_tagint_1d _moleculew;
+  typename AT::t_int_2d _nspecialw;
+  typename AT::t_tagint_2d _specialw;
+  typename AT::t_int_1d _num_bondw;
+  typename AT::t_int_2d _bond_typew;
+  typename AT::t_tagint_2d _bond_atomw;
+
+  typename AT::t_xfloat_2d_um _buf;
+  typename AT::t_int_1d_const _sendlist;
+  typename AT::t_int_1d_const _copylist;
+  int _nlocal,_dim;
+  X_FLOAT _lo,_hi;
+  size_t elements;
+
+  AtomVecBondKokkos_PackExchangeFunctor(
+      const AtomKokkos* atom,
+      const typename AT::tdual_xfloat_2d buf,
+      typename AT::tdual_int_1d sendlist,
+      typename AT::tdual_int_1d copylist,int nlocal, int dim,
+                X_FLOAT lo, X_FLOAT hi):
+    _x(atom->k_x.view<DeviceType>()),
+    _v(atom->k_v.view<DeviceType>()),
+    _tag(atom->k_tag.view<DeviceType>()),
+    _type(atom->k_type.view<DeviceType>()),
+    _mask(atom->k_mask.view<DeviceType>()),
+    _image(atom->k_image.view<DeviceType>()),
+		_molecule(atom->k_molecule.view<DeviceType>()),
+		_nspecial(atom->k_nspecial.view<DeviceType>()),
+		_special(atom->k_special.view<DeviceType>()),
+		_num_bond(atom->k_num_bond.view<DeviceType>()),
+		_bond_type(atom->k_bond_type.view<DeviceType>()),
+		_bond_atom(atom->k_bond_atom.view<DeviceType>()),
+    _xw(atom->k_x.view<DeviceType>()),
+    _vw(atom->k_v.view<DeviceType>()),
+    _tagw(atom->k_tag.view<DeviceType>()),
+    _typew(atom->k_type.view<DeviceType>()),
+    _maskw(atom->k_mask.view<DeviceType>()),
+    _imagew(atom->k_image.view<DeviceType>()),
+		_moleculew(atom->k_molecule.view<DeviceType>()),
+		_nspecialw(atom->k_nspecial.view<DeviceType>()),
+		_specialw(atom->k_special.view<DeviceType>()),
+		_num_bondw(atom->k_num_bond.view<DeviceType>()),
+		_bond_typew(atom->k_bond_type.view<DeviceType>()),
+		_bond_atomw(atom->k_bond_atom.view<DeviceType>()),
+    _sendlist(sendlist.template view<DeviceType>()),
+    _copylist(copylist.template view<DeviceType>()),
+    _nlocal(nlocal),_dim(dim),
+    _lo(lo),_hi(hi){
+    // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial,
+    // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom,
+    // 1 to store buffer lenght
+    elements = 16+atom->maxspecial+atom->bond_per_atom+atom->bond_per_atom;
+    const int maxsendlist = (buf.template view<DeviceType>().dimension_0()*
+			     buf.template view<DeviceType>().dimension_1())/elements;
+    buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int &mysend) const {
+    int k;
+    const int i = _sendlist(mysend);
+    _buf(mysend,0) = elements;
+    int m = 1;
+    _buf(mysend,m++) = _x(i,0);
+    _buf(mysend,m++) = _x(i,1);
+    _buf(mysend,m++) = _x(i,2);
+    _buf(mysend,m++) = _v(i,0);
+    _buf(mysend,m++) = _v(i,1);
+    _buf(mysend,m++) = _v(i,2);
+    _buf(mysend,m++) = _tag(i);
+    _buf(mysend,m++) = _type(i);
+    _buf(mysend,m++) = _mask(i);
+    _buf(mysend,m++) = _image(i);
+    _buf(mysend,m++) = _molecule(i);
+    _buf(mysend,m++) = _num_bond(i);
+    for (k = 0; k < _num_bond(i); k++) {
+      _buf(mysend,m++) = _bond_type(i,k);
+      _buf(mysend,m++) = _bond_atom(i,k);
+    }
+    _buf(mysend,m++) = _nspecial(i,0);
+    _buf(mysend,m++) = _nspecial(i,1);
+    _buf(mysend,m++) = _nspecial(i,2);
+    for (k = 0; k < _nspecial(i,2); k++)
+      _buf(mysend,m++) = _special(i,k);
+
+    const int j = _copylist(mysend);
+
+    if(j>-1) {
+    _xw(i,0) = _x(j,0);
+    _xw(i,1) = _x(j,1);
+    _xw(i,2) = _x(j,2);
+    _vw(i,0) = _v(j,0);
+    _vw(i,1) = _v(j,1);
+    _vw(i,2) = _v(j,2);
+    _tagw(i) = _tag(j);
+    _typew(i) = _type(j);
+    _maskw(i) = _mask(j);
+    _imagew(i) = _image(j);
+    _moleculew(i) = _molecule(j);
+    _num_bondw(i) = _num_bond(j);
+    for (k = 0; k < _num_bond(j); k++) {
+      _bond_typew(i,k) = _bond_type(j,k);
+      _bond_atomw(i,k) = _bond_atom(j,k);
+    }
+    _nspecialw(i,0) = _nspecial(j,0);
+    _nspecialw(i,1) = _nspecial(j,1);
+    _nspecialw(i,2) = _nspecial(j,2);
+    for (k = 0; k < _nspecial(j,2); k++)
+      _specialw(i,k) = _special(j,k);
+    }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecBondKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf,
+                                            DAT::tdual_int_1d k_sendlist,
+                                            DAT::tdual_int_1d k_copylist,
+                                            ExecutionSpace space,int dim,X_FLOAT lo,
+                                            X_FLOAT hi )
+{
+  const int elements = 16+atomKK->maxspecial+atomKK->bond_per_atom+atomKK->bond_per_atom;
+  if(nsend > (int) (k_buf.view<LMPHostType>().dimension_0()*
+	      k_buf.view<LMPHostType>().dimension_1())/elements) {
+    int newsize = nsend*elements/k_buf.view<LMPHostType>().dimension_1()+1;
+    k_buf.resize(newsize,k_buf.view<LMPHostType>().dimension_1());
+  }
+  if(space == Host) {
+    AtomVecBondKokkos_PackExchangeFunctor<LMPHostType>
+      f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
+    Kokkos::parallel_for(nsend,f);
+    LMPHostType::fence();
+    return nsend*elements;
+  } else {
+    AtomVecBondKokkos_PackExchangeFunctor<LMPDeviceType>
+      f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
+    Kokkos::parallel_for(nsend,f);
+    LMPDeviceType::fence();
+    return nsend*elements;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecBondKokkos::pack_exchange(int i, double *buf)
+{
+  int k;
+  int m = 1;
+  buf[m++] = h_x(i,0);
+  buf[m++] = h_x(i,1);
+  buf[m++] = h_x(i,2);
+  buf[m++] = h_v(i,0);
+  buf[m++] = h_v(i,1);
+  buf[m++] = h_v(i,2);
+  buf[m++] = ubuf(h_tag(i)).d;
+  buf[m++] = ubuf(h_type(i)).d;
+  buf[m++] = ubuf(h_mask(i)).d;
+  buf[m++] = ubuf(h_image(i)).d;
+  buf[m++] = ubuf(h_molecule(i)).d;
+
+  buf[m++] = ubuf(h_num_bond(i)).d;
+  for (k = 0; k < h_num_bond(i); k++) {
+    buf[m++] = ubuf(h_bond_type(i,k)).d;
+    buf[m++] = ubuf(h_bond_atom(i,k)).d;
+  }
+  buf[m++] = ubuf(h_nspecial(i,0)).d;
+  buf[m++] = ubuf(h_nspecial(i,1)).d;
+  buf[m++] = ubuf(h_nspecial(i,2)).d;
+  for (k = 0; k < h_nspecial(i,2); k++)
+    buf[m++] = ubuf(h_special(i,k)).d;
+
+  if (atom->nextra_grow)
+    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
+      m += modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&buf[m]);
+
+  buf[0] = m;
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecBondKokkos_UnpackExchangeFunctor {
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+  typename AT::t_x_array _x;
+  typename AT::t_v_array _v;
+  typename AT::t_tagint_1d _tag;
+  typename AT::t_int_1d _type;
+  typename AT::t_int_1d _mask;
+  typename AT::t_imageint_1d _image;
+  typename AT::t_tagint_1d _molecule;
+  typename AT::t_int_2d _nspecial;
+  typename AT::t_tagint_2d _special;
+  typename AT::t_int_1d _num_bond;
+  typename AT::t_int_2d _bond_type;
+  typename AT::t_tagint_2d _bond_atom;
+
+  typename AT::t_xfloat_2d_um _buf;
+  typename AT::t_int_1d _nlocal;
+  int _dim;
+  X_FLOAT _lo,_hi;
+  size_t elements;
+
+  AtomVecBondKokkos_UnpackExchangeFunctor(
+      const AtomKokkos* atom,
+      const typename AT::tdual_xfloat_2d buf,
+      typename AT::tdual_int_1d nlocal,
+      int dim, X_FLOAT lo, X_FLOAT hi):
+    _x(atom->k_x.view<DeviceType>()),
+    _v(atom->k_v.view<DeviceType>()),
+    _tag(atom->k_tag.view<DeviceType>()),
+    _type(atom->k_type.view<DeviceType>()),
+    _mask(atom->k_mask.view<DeviceType>()),
+    _image(atom->k_image.view<DeviceType>()),
+		_molecule(atom->k_molecule.view<DeviceType>()),
+		_nspecial(atom->k_nspecial.view<DeviceType>()),
+		_special(atom->k_special.view<DeviceType>()),
+		_num_bond(atom->k_num_bond.view<DeviceType>()),
+		_bond_type(atom->k_bond_type.view<DeviceType>()),
+		_bond_atom(atom->k_bond_atom.view<DeviceType>()),
+    _nlocal(nlocal.template view<DeviceType>()),_dim(dim),
+    _lo(lo),_hi(hi){
+    elements = 16+atom->maxspecial+atom->bond_per_atom+atom->bond_per_atom;
+    const int maxsendlist = (buf.template view<DeviceType>().dimension_0()*
+			     buf.template view<DeviceType>().dimension_1())/elements;
+    buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int &myrecv) const {
+    X_FLOAT x = _buf(myrecv,_dim+1);
+    if (x >= _lo && x < _hi) {
+      int i = Kokkos::atomic_fetch_add(&_nlocal(0),1);
+      int m = 1;
+      _x(i,0) = _buf(myrecv,m++);
+      _x(i,1) = _buf(myrecv,m++);
+      _x(i,2) = _buf(myrecv,m++);
+      _v(i,0) = _buf(myrecv,m++);
+      _v(i,1) = _buf(myrecv,m++);
+      _v(i,2) = _buf(myrecv,m++);
+      _tag(i) = _buf(myrecv,m++);
+      _type(i) = _buf(myrecv,m++);
+      _mask(i) = _buf(myrecv,m++);
+      _image(i) = _buf(myrecv,m++);
+
+      _molecule(i) = _buf(myrecv,m++);
+      _num_bond(i) = _buf(myrecv,m++);
+      int k;
+      for (k = 0; k < _num_bond(i); k++) {
+        _bond_type(i,k) = _buf(myrecv,m++);
+        _bond_atom(i,k) = _buf(myrecv,m++);
+      }
+      _nspecial(i,0) = _buf(myrecv,m++);
+      _nspecial(i,1) = _buf(myrecv,m++);
+      _nspecial(i,2) = _buf(myrecv,m++);
+      for (k = 0; k < _nspecial(i,2); k++)
+        _special(i,k) = _buf(myrecv,m++);
+    }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecBondKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,
+                                              int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,
+                                              ExecutionSpace space) {
+  const size_t elements = 16+atomKK->maxspecial+atomKK->bond_per_atom+atomKK->bond_per_atom;
+  if(space == Host) {
+    k_count.h_view(0) = nlocal;
+    AtomVecBondKokkos_UnpackExchangeFunctor<LMPHostType>
+      f(atomKK,k_buf,k_count,dim,lo,hi);
+    Kokkos::parallel_for(nrecv/elements,f);
+    LMPHostType::fence();
+    return k_count.h_view(0);
+  } else {
+    k_count.h_view(0) = nlocal;
+    k_count.modify<LMPHostType>();
+    k_count.sync<LMPDeviceType>();
+    AtomVecBondKokkos_UnpackExchangeFunctor<LMPDeviceType>
+      f(atomKK,k_buf,k_count,dim,lo,hi);
+    Kokkos::parallel_for(nrecv/elements,f);
+    LMPDeviceType::fence();
+    k_count.modify<LMPDeviceType>();
+    k_count.sync<LMPHostType>();
+
+    return k_count.h_view(0);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecBondKokkos::unpack_exchange(double *buf)
+{
+  int nlocal = atom->nlocal;
+  if (nlocal == nmax) grow(0);
+  modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
+           MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK | SPECIAL_MASK);
+
+  int k;
+  int m = 1;
+  h_x(nlocal,0) = buf[m++];
+  h_x(nlocal,1) = buf[m++];
+  h_x(nlocal,2) = buf[m++];
+  h_v(nlocal,0) = buf[m++];
+  h_v(nlocal,1) = buf[m++];
+  h_v(nlocal,2) = buf[m++];
+  h_tag(nlocal) = (tagint) ubuf(buf[m++]).i;
+  h_type(nlocal) = (int) ubuf(buf[m++]).i;
+  h_mask(nlocal) = (int) ubuf(buf[m++]).i;
+  h_image(nlocal) = (imageint) ubuf(buf[m++]).i;
+  h_molecule(nlocal) = (tagint) ubuf(buf[m++]).i;
+
+  h_num_bond(nlocal) = (int) ubuf(buf[m++]).i;
+  for (k = 0; k < h_num_bond(nlocal); k++) {
+    h_bond_type(nlocal,k) = (int) ubuf(buf[m++]).i;
+    h_bond_atom(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+  }
+
+  h_nspecial(nlocal,0) = (int) ubuf(buf[m++]).i;
+  h_nspecial(nlocal,1) = (int) ubuf(buf[m++]).i;
+  h_nspecial(nlocal,2) = (int) ubuf(buf[m++]).i;
+  for (k = 0; k < h_nspecial(nlocal,2); k++)
+   h_special(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+
+  if (atom->nextra_grow)
+    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
+      m += modify->fix[atom->extra_grow[iextra]]->
+        unpack_exchange(nlocal,&buf[m]);
+
+  atom->nlocal++;
+  return m;
+}
+
+/* ----------------------------------------------------------------------
+   size of restart data for all atoms owned by this proc
+   include extra data stored by fixes
+------------------------------------------------------------------------- */
+
+int AtomVecBondKokkos::size_restart()
+{
+  int i;
+
+  int nlocal = atom->nlocal;
+  int n = 0;
+  for (i = 0; i < nlocal; i++)
+    n += 13 + 2*h_num_bond[i];
+
+  if (atom->nextra_restart)
+    for (int iextra = 0; iextra < atom->nextra_restart; iextra++)
+      for (i = 0; i < nlocal; i++)
+        n += modify->fix[atom->extra_restart[iextra]]->size_restart(i);
+
+  return n;
+}
+
+/* ----------------------------------------------------------------------
+   pack atom I's data for restart file including extra quantities
+   xyz must be 1st 3 values, so that read_restart can test on them
+   molecular types may be negative, but write as positive
+------------------------------------------------------------------------- */
+
+int AtomVecBondKokkos::pack_restart(int i, double *buf)
+{
+  int m = 1;
+  buf[m++] = h_x(i,0);
+  buf[m++] = h_x(i,1);
+  buf[m++] = h_x(i,2);
+  buf[m++] = ubuf(h_tag(i)).d;
+  buf[m++] = ubuf(h_type(i)).d;
+  buf[m++] = ubuf(h_mask(i)).d;
+  buf[m++] = ubuf(h_image(i)).d;
+  buf[m++] = h_v(i,0);
+  buf[m++] = h_v(i,1);
+  buf[m++] = h_v(i,2);
+
+  buf[m++] = ubuf(h_molecule(i)).d;
+
+  buf[m++] = ubuf(h_num_bond(i)).d;
+  for (int k = 0; k < h_num_bond(i); k++) {
+    buf[m++] = ubuf(MAX(h_bond_type(i,k),-h_bond_type(i,k))).d;
+    buf[m++] = ubuf(h_bond_atom(i,k)).d;
+  }
+
+  if (atom->nextra_restart)
+    for (int iextra = 0; iextra < atom->nextra_restart; iextra++)
+      m += modify->fix[atom->extra_restart[iextra]]->pack_restart(i,&buf[m]);
+
+  buf[0] = m;
+  return m;
+}
+
+/* ----------------------------------------------------------------------
+   unpack data for one atom from restart file including extra quantities
+------------------------------------------------------------------------- */
+
+int AtomVecBondKokkos::unpack_restart(double *buf)
+{
+  int k;
+
+  int nlocal = atom->nlocal;
+  if (nlocal == nmax) {
+    grow(0);
+    if (atom->nextra_store)
+      memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra");
+  }
+
+  int m = 1;
+  h_x(nlocal,0) = buf[m++];
+  h_x(nlocal,1) = buf[m++];
+  h_x(nlocal,2) = buf[m++];
+  h_tag(nlocal) = (tagint) ubuf(buf[m++]).i;
+  h_type(nlocal) = (int) ubuf(buf[m++]).i;
+  h_mask(nlocal) = (int) ubuf(buf[m++]).i;
+  h_image(nlocal) = (imageint) ubuf(buf[m++]).i;
+  h_v(nlocal,0) = buf[m++];
+  h_v(nlocal,1) = buf[m++];
+  h_v(nlocal,2) = buf[m++];
+
+  h_molecule(nlocal) = (tagint) ubuf(buf[m++]).i;
+
+  h_num_bond(nlocal) = (int) ubuf(buf[m++]).i;
+  for (k = 0; k < h_num_bond(nlocal); k++) {
+    h_bond_type(nlocal,k) = (int) ubuf(buf[m++]).i;
+    h_bond_atom(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+  }
+
+  h_nspecial(nlocal,0) = h_nspecial(nlocal,1) = h_nspecial(nlocal,2) = 0;
+
+  double **extra = atom->extra;
+  if (atom->nextra_store) {
+    int size = static_cast<int> (ubuf(buf[m++]).i) - m;
+    for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++];
+  }
+
+  atom->nlocal++;
+  return m;
+}
+
+/* ----------------------------------------------------------------------
+   create one atom of itype at coord
+   set other values to defaults
+------------------------------------------------------------------------- */
+
+void AtomVecBondKokkos::create_atom(int itype, double *coord)
+{
+  int nlocal = atom->nlocal;
+  if (nlocal == nmax) {
+    atomKK->modified(Host,ALL_MASK);
+    grow(0);
+  }
+  atomKK->modified(Host,ALL_MASK);
+
+  tag[nlocal] = 0;
+  type[nlocal] = itype;
+  h_x(nlocal,0) = coord[0];
+  h_x(nlocal,1) = coord[1];
+  h_x(nlocal,2) = coord[2];
+  h_mask(nlocal) = 1;
+  h_image(nlocal) = ((imageint) IMGMAX << IMG2BITS) |
+    ((imageint) IMGMAX << IMGBITS) | IMGMAX;
+  h_v(nlocal,0) = 0.0;
+  h_v(nlocal,1) = 0.0;
+  h_v(nlocal,2) = 0.0;
+
+  h_molecule(nlocal) = 0;
+  h_num_bond(nlocal) = 0;
+  h_nspecial(nlocal,0) = h_nspecial(nlocal,1) = h_nspecial(nlocal,2) = 0;
+
+  atom->nlocal++;
+}
+
+/* ----------------------------------------------------------------------
+   unpack one line from Atoms section of data file
+   initialize other atom quantities
+------------------------------------------------------------------------- */
+
+void AtomVecBondKokkos::data_atom(double *coord, imageint imagetmp,
+                                  char **values)
+{
+  int nlocal = atomKK->nlocal;
+  if (nlocal == nmax) grow(0);
+
+  h_tag(nlocal) = atoi(values[0]);
+  if (h_tag(nlocal) <= 0)
+    error->one(FLERR,"Invalid atom ID in Atoms section of data file");
+
+  h_molecule(nlocal) = atoi(values[1]);
+  if (h_molecule(nlocal) <= 0)
+    error->one(FLERR,"Invalid molecule ID in Atoms section of data file");
+
+  h_type(nlocal) = atoi(values[2]);
+  if (h_type(nlocal) <= 0 || h_type(nlocal) > atom->ntypes)
+    error->one(FLERR,"Invalid atom type in Atoms section of data file");
+
+  h_x(nlocal,0) = coord[0];
+  h_x(nlocal,1) = coord[1];
+  h_x(nlocal,2) = coord[2];
+
+  h_image(nlocal) = imagetmp;
+
+  h_mask(nlocal) = 1;
+  h_v(nlocal,0) = 0.0;
+  h_v(nlocal,1) = 0.0;
+  h_v(nlocal,2) = 0.0;
+  h_num_bond(nlocal) = 0;
+
+  atomKK->nlocal++;
+}
+
+/* ----------------------------------------------------------------------
+   unpack hybrid quantities from one line in Atoms section of data file
+   initialize other atom quantities for this sub-style
+------------------------------------------------------------------------- */
+
+int AtomVecBondKokkos::data_atom_hybrid(int nlocal, char **values)
+{
+  h_molecule(nlocal) = atoi(values[0]);
+  h_num_bond(nlocal) = 0;
+  return 1;
+}
+
+/* ----------------------------------------------------------------------
+   pack atom info for data file including 3 image flags
+------------------------------------------------------------------------- */
+
+void AtomVecBondKokkos::pack_data(double **buf)
+{
+  int nlocal = atom->nlocal;
+  for (int i = 0; i < nlocal; i++) {
+    buf[i][0] = h_tag(i);
+    buf[i][1] = h_molecule(i);
+    buf[i][2] = h_type(i);
+    buf[i][3] = h_x(i,0);
+    buf[i][4] = h_x(i,1);
+    buf[i][5] = h_x(i,2);
+    buf[i][6] = (h_image[i] & IMGMASK) - IMGMAX;
+    buf[i][7] = (h_image[i] >> IMGBITS & IMGMASK) - IMGMAX;
+    buf[i][8] = (h_image[i] >> IMG2BITS) - IMGMAX;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   pack hybrid atom info for data file
+------------------------------------------------------------------------- */
+
+int AtomVecBondKokkos::pack_data_hybrid(int i, double *buf)
+{
+  buf[0] = h_molecule(i);
+  return 1;
+}
+
+/* ----------------------------------------------------------------------
+   write atom info to data file including 3 image flags
+------------------------------------------------------------------------- */
+
+void AtomVecBondKokkos::write_data(FILE *fp, int n, double **buf)
+{
+  for (int i = 0; i < n; i++)
+    fprintf(fp,"%d %d %d %-1.16e %-1.16e %-1.16e %d %d %d\n",
+            (int) buf[i][0],(int) buf[i][1], (int) buf[i][2],
+	    buf[i][3],buf[i][4],buf[i][5],
+            (int) buf[i][6],(int) buf[i][7],(int) buf[i][8]);
+}
+
+/* ----------------------------------------------------------------------
+   write hybrid atom info to data file
+------------------------------------------------------------------------- */
+
+int AtomVecBondKokkos::write_data_hybrid(FILE *fp, double *buf)
+{
+  fprintf(fp," " TAGINT_FORMAT, (tagint) (buf[0]));
+  return 1;
+}
+
+/* ----------------------------------------------------------------------
+   return # of bytes of allocated memory
+------------------------------------------------------------------------- */
+
+bigint AtomVecBondKokkos::memory_usage()
+{
+  bigint bytes = 0;
+
+  if (atom->memcheck("tag")) bytes += memory->usage(tag,nmax);
+  if (atom->memcheck("type")) bytes += memory->usage(type,nmax);
+  if (atom->memcheck("mask")) bytes += memory->usage(mask,nmax);
+  if (atom->memcheck("image")) bytes += memory->usage(image,nmax);
+  if (atom->memcheck("x")) bytes += memory->usage(x,nmax,3);
+  if (atom->memcheck("v")) bytes += memory->usage(v,nmax,3);
+  if (atom->memcheck("f")) bytes += memory->usage(f,nmax*commKK->nthreads,3);
+
+  if (atom->memcheck("molecule")) bytes += memory->usage(molecule,nmax);
+  if (atom->memcheck("nspecial")) bytes += memory->usage(nspecial,nmax,3);
+  if (atom->memcheck("special"))
+    bytes += memory->usage(special,nmax,atom->maxspecial);
+
+  if (atom->memcheck("num_bond")) bytes += memory->usage(num_bond,nmax);
+  if (atom->memcheck("bond_type"))
+    bytes += memory->usage(bond_type,nmax,atom->bond_per_atom);
+  if (atom->memcheck("bond_atom"))
+    bytes += memory->usage(bond_atom,nmax,atom->bond_per_atom);
+
+  return bytes;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecBondKokkos::sync(ExecutionSpace space, unsigned int mask)
+{
+  if (space == Device) {
+    if (mask & X_MASK) atomKK->k_x.sync<LMPDeviceType>();
+    if (mask & V_MASK) atomKK->k_v.sync<LMPDeviceType>();
+    if (mask & F_MASK) atomKK->k_f.sync<LMPDeviceType>();
+    if (mask & TAG_MASK) atomKK->k_tag.sync<LMPDeviceType>();
+    if (mask & TYPE_MASK) atomKK->k_type.sync<LMPDeviceType>();
+    if (mask & MASK_MASK) atomKK->k_mask.sync<LMPDeviceType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPDeviceType>();
+    if (mask && MOLECULE_MASK) atomKK->k_molecule.sync<LMPDeviceType>();
+    if (mask && SPECIAL_MASK) {
+      atomKK->k_nspecial.sync<LMPDeviceType>();
+      atomKK->k_special.sync<LMPDeviceType>();
+    }
+    if (mask && BOND_MASK) {
+      atomKK->k_num_bond.sync<LMPDeviceType>();
+      atomKK->k_bond_type.sync<LMPDeviceType>();
+      atomKK->k_bond_atom.sync<LMPDeviceType>();
+    }
+  } else {
+    if (mask & X_MASK) atomKK->k_x.sync<LMPHostType>();
+    if (mask & V_MASK) atomKK->k_v.sync<LMPHostType>();
+    if (mask & F_MASK) atomKK->k_f.sync<LMPHostType>();
+    if (mask & TAG_MASK) atomKK->k_tag.sync<LMPHostType>();
+    if (mask & TYPE_MASK) atomKK->k_type.sync<LMPHostType>();
+    if (mask & MASK_MASK) atomKK->k_mask.sync<LMPHostType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPHostType>();
+    if (mask && MOLECULE_MASK) atomKK->k_molecule.sync<LMPHostType>();
+    if (mask && SPECIAL_MASK) {
+      atomKK->k_nspecial.sync<LMPHostType>();
+      atomKK->k_special.sync<LMPHostType>();
+    }
+    if (mask && BOND_MASK) {
+      atomKK->k_num_bond.sync<LMPHostType>();
+      atomKK->k_bond_type.sync<LMPHostType>();
+      atomKK->k_bond_atom.sync<LMPHostType>();
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecBondKokkos::modified(ExecutionSpace space, unsigned int mask)
+{
+  if (space == Device) {
+    if (mask & X_MASK) atomKK->k_x.modify<LMPDeviceType>();
+    if (mask & V_MASK) atomKK->k_v.modify<LMPDeviceType>();
+    if (mask & F_MASK) atomKK->k_f.modify<LMPDeviceType>();
+    if (mask & TAG_MASK) atomKK->k_tag.modify<LMPDeviceType>();
+    if (mask & TYPE_MASK) atomKK->k_type.modify<LMPDeviceType>();
+    if (mask & MASK_MASK) atomKK->k_mask.modify<LMPDeviceType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPDeviceType>();
+    if (mask && MOLECULE_MASK) atomKK->k_molecule.modify<LMPDeviceType>();
+    if (mask && SPECIAL_MASK) {
+      atomKK->k_nspecial.modify<LMPDeviceType>();
+      atomKK->k_special.modify<LMPDeviceType>();
+    }
+    if (mask && BOND_MASK) {
+      atomKK->k_num_bond.modify<LMPDeviceType>();
+      atomKK->k_bond_type.modify<LMPDeviceType>();
+      atomKK->k_bond_atom.modify<LMPDeviceType>();
+    }
+  } else {
+    if (mask & X_MASK) atomKK->k_x.modify<LMPHostType>();
+    if (mask & V_MASK) atomKK->k_v.modify<LMPHostType>();
+    if (mask & F_MASK) atomKK->k_f.modify<LMPHostType>();
+    if (mask & TAG_MASK) atomKK->k_tag.modify<LMPHostType>();
+    if (mask & TYPE_MASK) atomKK->k_type.modify<LMPHostType>();
+    if (mask & MASK_MASK) atomKK->k_mask.modify<LMPHostType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPHostType>();
+    if (mask && MOLECULE_MASK) atomKK->k_molecule.modify<LMPHostType>();
+    if (mask && SPECIAL_MASK) {
+      atomKK->k_nspecial.modify<LMPHostType>();
+      atomKK->k_special.modify<LMPHostType>();
+    }
+    if (mask && BOND_MASK) {
+      atomKK->k_num_bond.modify<LMPHostType>();
+      atomKK->k_bond_type.modify<LMPHostType>();
+      atomKK->k_bond_atom.modify<LMPHostType>();
+    }
+  }
+}
diff --git a/src/KOKKOS/atom_vec_bond_kokkos.h b/src/KOKKOS/atom_vec_bond_kokkos.h
new file mode 100644
index 0000000000000000000000000000000000000000..676cd1202c91d941f5f74dd3997c7c8d997251f8
--- /dev/null
+++ b/src/KOKKOS/atom_vec_bond_kokkos.h
@@ -0,0 +1,141 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef ATOM_CLASS
+
+AtomStyle(bond/kk,AtomVecBondKokkos)
+
+#else
+
+#ifndef LMP_ATOM_VEC_BOND_KOKKOS_H
+#define LMP_ATOM_VEC_BOND_KOKKOS_H
+
+#include "atom_vec_kokkos.h"
+
+namespace LAMMPS_NS {
+
+class AtomVecBondKokkos : public AtomVecKokkos {
+ public:
+  AtomVecBondKokkos(class LAMMPS *);
+  virtual ~AtomVecBondKokkos() {}
+  void grow(int);
+  void copy(int, int, int);
+  int pack_comm(int, int *, double *, int, int *);
+  int pack_comm_vel(int, int *, double *, int, int *);
+  void unpack_comm(int, int, double *);
+  void unpack_comm_vel(int, int, double *);
+  int pack_reverse(int, int, double *);
+  void unpack_reverse(int, int *, double *);
+  int pack_border(int, int *, double *, int, int *);
+  int pack_border_vel(int, int *, double *, int, int *);
+  int pack_border_hybrid(int, int *, double *);
+  void unpack_border(int, int, double *);
+  void unpack_border_vel(int, int, double *);
+  int unpack_border_hybrid(int, int, double *);
+  int pack_exchange(int, double *);
+  int unpack_exchange(double *);
+  int size_restart();
+  int pack_restart(int, double *);
+  int unpack_restart(double *);
+  void create_atom(int, double *);
+  void data_atom(double *, tagint, char **);
+  int data_atom_hybrid(int, char **);
+  void pack_data(double **);
+  int pack_data_hybrid(int, double *);
+  void write_data(FILE *, int, double **);
+  int write_data_hybrid(FILE *, double *);
+  bigint memory_usage();
+
+  void grow_reset();
+  int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
+                       const int & iswap,
+                       const DAT::tdual_xfloat_2d &buf,
+                       const int &pbc_flag, const int pbc[]);
+  void unpack_comm_kokkos(const int &n, const int &nfirst,
+                          const DAT::tdual_xfloat_2d &buf);
+  int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
+                     const int & iswap, const int nfirst,
+                     const int &pbc_flag, const int pbc[]);
+  int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
+                         DAT::tdual_xfloat_2d buf,int iswap,
+                         int pbc_flag, int *pbc, ExecutionSpace space);
+  void unpack_border_kokkos(const int &n, const int &nfirst,
+                            const DAT::tdual_xfloat_2d &buf,
+                            ExecutionSpace space);
+  int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
+                           DAT::tdual_int_1d k_sendlist,
+                           DAT::tdual_int_1d k_copylist,
+                           ExecutionSpace space, int dim,
+                           X_FLOAT lo, X_FLOAT hi);
+  int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
+                             int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
+                             ExecutionSpace space);
+
+  void sync(ExecutionSpace space, unsigned int mask);
+  void modified(ExecutionSpace space, unsigned int mask);
+
+ protected:
+
+  tagint *tag;
+  int *type,*mask;
+  imageint *image;
+  double **x,**v,**f;
+
+  tagint *molecule;
+  int **nspecial;
+  tagint **special;
+  int *num_bond;
+  int **bond_type;
+  tagint **bond_atom;
+
+  DAT::t_tagint_1d d_tag;
+  DAT::t_int_1d d_type, d_mask;
+  HAT::t_tagint_1d h_tag;
+  HAT::t_int_1d h_type, h_mask;
+
+  DAT::t_imageint_1d d_image;
+  HAT::t_imageint_1d h_image;
+
+  DAT::t_x_array d_x;
+  DAT::t_v_array d_v;
+  DAT::t_f_array d_f;
+  HAT::t_x_array h_x;
+  HAT::t_v_array h_v;
+  HAT::t_f_array h_f;
+
+  DAT::t_tagint_1d d_molecule;
+  DAT::t_int_2d d_nspecial;
+  DAT::t_tagint_2d d_special;
+  DAT::t_int_1d d_num_bond;
+  DAT::t_int_2d d_bond_type;
+  DAT::t_tagint_2d d_bond_atom;
+
+  HAT::t_tagint_1d h_molecule;
+  HAT::t_int_2d h_nspecial;
+  HAT::t_tagint_2d h_special;
+  HAT::t_int_1d h_num_bond;
+  HAT::t_int_2d h_bond_type;
+  HAT::t_tagint_2d h_bond_atom;
+
+  DAT::tdual_int_1d k_count;
+
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+*/
diff --git a/src/KOKKOS/atom_vec_charge_kokkos.cpp b/src/KOKKOS/atom_vec_charge_kokkos.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..97f489fb5861b07ec3bed975a709bb3fc8b78ae0
--- /dev/null
+++ b/src/KOKKOS/atom_vec_charge_kokkos.cpp
@@ -0,0 +1,1517 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "stdlib.h"
+#include "atom_vec_charge_kokkos.h"
+#include "atom_kokkos.h"
+#include "comm_kokkos.h"
+#include "domain.h"
+#include "modify.h"
+#include "fix.h"
+#include "atom_masks.h"
+#include "memory.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+#define DELTA 10000
+
+/* ---------------------------------------------------------------------- */
+
+AtomVecChargeKokkos::AtomVecChargeKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp)
+{
+  molecular = 0;
+  mass_type = 1;
+
+  comm_x_only = comm_f_only = 1;
+  size_forward = 3;
+  size_reverse = 3;
+  size_border = 7;
+  size_velocity = 3;
+  size_data_atom = 6;
+  size_data_vel = 4;
+  xcol_data = 4;
+
+  atom->q_flag = 1;
+
+  k_count = DAT::tdual_int_1d("atom::k_count",1);
+  atomKK = (AtomKokkos *) atom;
+  commKK = (CommKokkos *) comm;
+
+}
+
+/* ----------------------------------------------------------------------
+   grow atom arrays
+   n = 0 grows arrays by DELTA
+   n > 0 allocates arrays to size n
+------------------------------------------------------------------------- */
+
+void AtomVecChargeKokkos::grow(int n)
+{
+  if (n == 0) nmax += DELTA;
+  else nmax = n;
+  atomKK->nmax = nmax;
+  if (nmax < 0 || nmax > MAXSMALLINT)
+    error->one(FLERR,"Per-processor system is too big");
+
+  sync(Device,ALL_MASK);
+  modified(Device,ALL_MASK);
+
+  memory->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag");
+  memory->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type");
+  memory->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask");
+  memory->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image");
+
+  memory->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x");
+  memory->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v");
+  memory->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f");
+
+  memory->grow_kokkos(atomKK->k_q,atomKK->q,nmax,"atom:q");
+
+  grow_reset();
+  sync(Host,ALL_MASK);
+
+  if (atom->nextra_grow)
+    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
+      modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax);
+}
+
+/* ----------------------------------------------------------------------
+   reset local array ptrs
+------------------------------------------------------------------------- */
+
+void AtomVecChargeKokkos::grow_reset()
+{
+  tag = atomKK->tag;
+  d_tag = atomKK->k_tag.d_view;
+  h_tag = atomKK->k_tag.h_view;
+
+  type = atomKK->type;
+  d_type = atomKK->k_type.d_view;
+  h_type = atomKK->k_type.h_view;
+  mask = atomKK->mask;
+  d_mask = atomKK->k_mask.d_view;
+  h_mask = atomKK->k_mask.h_view;
+  image = atomKK->image;
+  d_image = atomKK->k_image.d_view;
+  h_image = atomKK->k_image.h_view;
+
+  x = atomKK->x;
+  d_x = atomKK->k_x.d_view;
+  h_x = atomKK->k_x.h_view;
+  v = atomKK->v;
+  d_v = atomKK->k_v.d_view;
+  h_v = atomKK->k_v.h_view;
+  f = atomKK->f;
+  d_f = atomKK->k_f.d_view;
+  h_f = atomKK->k_f.h_view;
+
+  q = atomKK->q;
+  d_q = atomKK->k_q.d_view;
+  h_q = atomKK->k_q.h_view;
+
+}
+
+/* ----------------------------------------------------------------------
+   copy atom I info to atom J
+------------------------------------------------------------------------- */
+
+void AtomVecChargeKokkos::copy(int i, int j, int delflag)
+{
+  h_tag[j] = h_tag[i];
+  h_type[j] = h_type[i];
+  mask[j] = mask[i];
+  h_image[j] = h_image[i];
+  h_x(j,0) = h_x(i,0);
+  h_x(j,1) = h_x(i,1);
+  h_x(j,2) = h_x(i,2);
+  h_v(j,0) = h_v(i,0);
+  h_v(j,1) = h_v(i,1);
+  h_v(j,2) = h_v(i,2);
+
+  h_q[j] = h_q[i];
+
+  if (atom->nextra_grow)
+    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
+      modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType,int PBC_FLAG,int TRICLINIC>
+struct AtomVecChargeKokkos_PackComm {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
+  typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
+  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
+  const int _iswap;
+  X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
+  X_FLOAT _pbc[6];
+
+  AtomVecChargeKokkos_PackComm(
+      const typename DAT::tdual_x_array &x,
+      const typename DAT::tdual_xfloat_2d &buf,
+      const typename DAT::tdual_int_2d &list,
+      const int & iswap,
+      const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
+      const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
+      _x(x.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap),
+      _xprd(xprd),_yprd(yprd),_zprd(zprd),
+      _xy(xy),_xz(xz),_yz(yz) {
+        const size_t maxsend = (buf.view<DeviceType>().dimension_0()*buf.view<DeviceType>().dimension_1())/3;
+        const size_t elements = 3;
+        buffer_view<DeviceType>(_buf,buf,maxsend,elements);
+        _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
+        _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
+  };
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+        const int j = _list(_iswap,i);
+      if (PBC_FLAG == 0) {
+          _buf(i,0) = _x(j,0);
+          _buf(i,1) = _x(j,1);
+          _buf(i,2) = _x(j,2);
+      } else {
+        if (TRICLINIC == 0) {
+          _buf(i,0) = _x(j,0) + _pbc[0]*_xprd;
+          _buf(i,1) = _x(j,1) + _pbc[1]*_yprd;
+          _buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
+        } else {
+          _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
+          _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
+          _buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
+        }
+      }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecChargeKokkos::pack_comm_kokkos(const int &n,
+                                          const DAT::tdual_int_2d &list,
+                                          const int & iswap,
+                                          const DAT::tdual_xfloat_2d &buf,
+                                          const int &pbc_flag,
+                                          const int* const pbc)
+{
+  // Check whether to always run forward communication on the host
+  // Choose correct forward PackComm kernel
+
+  if(commKK->forward_comm_on_host) {
+    sync(Host,X_MASK);
+    if(pbc_flag) {
+      if(domain->triclinic) {
+        struct AtomVecChargeKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      } else {
+        struct AtomVecChargeKokkos_PackComm<LMPHostType,1,0> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      }
+    } else {
+      if(domain->triclinic) {
+        struct AtomVecChargeKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      } else {
+        struct AtomVecChargeKokkos_PackComm<LMPHostType,0,0> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      }
+    }
+    LMPHostType::fence();
+  } else {
+    sync(Device,X_MASK);
+    if(pbc_flag) {
+      if(domain->triclinic) {
+        struct AtomVecChargeKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      } else {
+        struct AtomVecChargeKokkos_PackComm<LMPDeviceType,1,0> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      }
+    } else {
+      if(domain->triclinic) {
+        struct AtomVecChargeKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      } else {
+        struct AtomVecChargeKokkos_PackComm<LMPDeviceType,0,0> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      }
+    }
+    LMPDeviceType::fence();
+  }
+
+	return n*size_forward;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType,int PBC_FLAG,int TRICLINIC>
+struct AtomVecChargeKokkos_PackCommSelf {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
+  typename ArrayTypes<DeviceType>::t_x_array _xw;
+  int _nfirst;
+  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
+  const int _iswap;
+  X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
+  X_FLOAT _pbc[6];
+
+  AtomVecChargeKokkos_PackCommSelf(
+      const typename DAT::tdual_x_array &x,
+      const int &nfirst,
+      const typename DAT::tdual_int_2d &list,
+      const int & iswap,
+      const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
+      const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
+      _x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),_nfirst(nfirst),_list(list.view<DeviceType>()),_iswap(iswap),
+      _xprd(xprd),_yprd(yprd),_zprd(zprd),
+      _xy(xy),_xz(xz),_yz(yz) {
+        _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
+        _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
+  };
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+        const int j = _list(_iswap,i);
+      if (PBC_FLAG == 0) {
+          _xw(i+_nfirst,0) = _x(j,0);
+          _xw(i+_nfirst,1) = _x(j,1);
+          _xw(i+_nfirst,2) = _x(j,2);
+      } else {
+        if (TRICLINIC == 0) {
+          _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd;
+          _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd;
+          _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
+        } else {
+          _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
+          _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
+          _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
+        }
+      }
+
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecChargeKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap,
+										const int nfirst, const int &pbc_flag, const int* const pbc) {
+  if(commKK->forward_comm_on_host) {
+    sync(Host,X_MASK);
+    modified(Host,X_MASK);
+    if(pbc_flag) {
+      if(domain->triclinic) {
+      struct AtomVecChargeKokkos_PackCommSelf<LMPHostType,1,1> f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      } else {
+      struct AtomVecChargeKokkos_PackCommSelf<LMPHostType,1,0> f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      }
+    } else {
+      if(domain->triclinic) {
+      struct AtomVecChargeKokkos_PackCommSelf<LMPHostType,0,1> f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      } else {
+      struct AtomVecChargeKokkos_PackCommSelf<LMPHostType,0,0> f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      }
+    }
+    LMPHostType::fence();
+  } else {
+    sync(Device,X_MASK);
+    modified(Device,X_MASK);
+    if(pbc_flag) {
+      if(domain->triclinic) {
+      struct AtomVecChargeKokkos_PackCommSelf<LMPDeviceType,1,1> f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      } else {
+      struct AtomVecChargeKokkos_PackCommSelf<LMPDeviceType,1,0> f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      }
+    } else {
+      if(domain->triclinic) {
+      struct AtomVecChargeKokkos_PackCommSelf<LMPDeviceType,0,1> f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      } else {
+      struct AtomVecChargeKokkos_PackCommSelf<LMPDeviceType,0,0> f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      }
+    }
+    LMPDeviceType::fence();
+  }
+	return n*3;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecChargeKokkos_UnpackComm {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_x_array _x;
+  typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf;
+  int _first;
+
+  AtomVecChargeKokkos_UnpackComm(
+      const typename DAT::tdual_x_array &x,
+      const typename DAT::tdual_xfloat_2d &buf,
+      const int& first):_x(x.view<DeviceType>()),_buf(buf.view<DeviceType>()),
+                        _first(first) {};
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+      _x(i+_first,0) = _buf(i,0);
+      _x(i+_first,1) = _buf(i,1);
+      _x(i+_first,2) = _buf(i,2);
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecChargeKokkos::unpack_comm_kokkos(const int &n, const int &first,
+    const DAT::tdual_xfloat_2d &buf ) {
+  if(commKK->forward_comm_on_host) {
+    sync(Host,X_MASK);
+    modified(Host,X_MASK);
+    struct AtomVecChargeKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
+    Kokkos::parallel_for(n,f);
+    LMPDeviceType::fence();
+  } else {
+    sync(Device,X_MASK);
+    modified(Device,X_MASK);
+    struct AtomVecChargeKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
+    Kokkos::parallel_for(n,f);
+    LMPDeviceType::fence();
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecChargeKokkos::pack_comm(int n, int *list, double *buf,
+                             int pbc_flag, int *pbc)
+{
+  int i,j,m;
+  double dx,dy,dz;
+
+  m = 0;
+  if (pbc_flag == 0) {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0);
+      buf[m++] = h_x(j,1);
+      buf[m++] = h_x(j,2);
+    }
+  } else {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
+      dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
+      dz = pbc[2]*domain->zprd;
+    }
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0) + dx;
+      buf[m++] = h_x(j,1) + dy;
+      buf[m++] = h_x(j,2) + dz;
+    }
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecChargeKokkos::pack_comm_vel(int n, int *list, double *buf,
+                                 int pbc_flag, int *pbc)
+{
+  int i,j,m;
+  double dx,dy,dz,dvx,dvy,dvz;
+
+  m = 0;
+  if (pbc_flag == 0) {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0);
+      buf[m++] = h_x(j,1);
+      buf[m++] = h_x(j,2);
+      buf[m++] = h_v(j,0);
+      buf[m++] = h_v(j,1);
+      buf[m++] = h_v(j,2);
+    }
+  } else {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
+      dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
+      dz = pbc[2]*domain->zprd;
+    }
+    if (!deform_vremap) {
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0) + dx;
+        buf[m++] = h_x(j,1) + dy;
+        buf[m++] = h_x(j,2) + dz;
+        buf[m++] = h_v(j,0);
+        buf[m++] = h_v(j,1);
+        buf[m++] = h_v(j,2);
+      }
+    } else {
+      dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
+      dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
+      dvz = pbc[2]*h_rate[2];
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0) + dx;
+        buf[m++] = h_x(j,1) + dy;
+        buf[m++] = h_x(j,2) + dz;
+        if (mask[i] & deform_groupbit) {
+          buf[m++] = h_v(j,0) + dvx;
+          buf[m++] = h_v(j,1) + dvy;
+          buf[m++] = h_v(j,2) + dvz;
+        } else {
+          buf[m++] = h_v(j,0);
+          buf[m++] = h_v(j,1);
+          buf[m++] = h_v(j,2);
+        }
+      }
+    }
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecChargeKokkos::unpack_comm(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    h_x(i,0) = buf[m++];
+    h_x(i,1) = buf[m++];
+    h_x(i,2) = buf[m++];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecChargeKokkos::unpack_comm_vel(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    h_x(i,0) = buf[m++];
+    h_x(i,1) = buf[m++];
+    h_x(i,2) = buf[m++];
+    h_v(i,0) = buf[m++];
+    h_v(i,1) = buf[m++];
+    h_v(i,2) = buf[m++];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecChargeKokkos::pack_reverse(int n, int first, double *buf)
+{
+  if(n > 0)
+    sync(Host,F_MASK);
+
+  int m = 0;
+  const int last = first + n;
+  for (int i = first; i < last; i++) {
+    buf[m++] = h_f(i,0);
+    buf[m++] = h_f(i,1);
+    buf[m++] = h_f(i,2);
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecChargeKokkos::unpack_reverse(int n, int *list, double *buf)
+{
+  if(n > 0)
+    modified(Host,F_MASK);
+
+  int m = 0;
+  for (int i = 0; i < n; i++) {
+    const int j = list[i];
+    h_f(j,0) += buf[m++];
+    h_f(j,1) += buf[m++];
+    h_f(j,2) += buf[m++];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType,int PBC_FLAG>
+struct AtomVecChargeKokkos_PackBorder {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_xfloat_2d _buf;
+  const typename ArrayTypes<DeviceType>::t_int_2d_const _list;
+  const int _iswap;
+  const typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
+  const typename ArrayTypes<DeviceType>::t_tagint_1d _tag;
+  const typename ArrayTypes<DeviceType>::t_int_1d _type;
+  const typename ArrayTypes<DeviceType>::t_int_1d _mask;
+  const typename ArrayTypes<DeviceType>::t_float_1d _q;
+  X_FLOAT _dx,_dy,_dz;
+
+  AtomVecChargeKokkos_PackBorder(
+      const typename ArrayTypes<DeviceType>::t_xfloat_2d &buf,
+      const typename ArrayTypes<DeviceType>::t_int_2d_const &list,
+      const int & iswap,
+      const typename ArrayTypes<DeviceType>::t_x_array &x,
+      const typename ArrayTypes<DeviceType>::t_tagint_1d &tag,
+      const typename ArrayTypes<DeviceType>::t_int_1d &type,
+      const typename ArrayTypes<DeviceType>::t_int_1d &mask,
+      const typename ArrayTypes<DeviceType>::t_float_1d &q,
+      const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz):
+  _buf(buf),_list(list),_iswap(iswap),
+    _x(x),_tag(tag),_type(type),_mask(mask),_q(q),
+    _dx(dx),_dy(dy),_dz(dz) {}
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+      const int j = _list(_iswap,i);
+      if (PBC_FLAG == 0) {
+          _buf(i,0) = _x(j,0);
+          _buf(i,1) = _x(j,1);
+          _buf(i,2) = _x(j,2);
+          _buf(i,3) = _tag(j);
+          _buf(i,4) = _type(j);
+          _buf(i,5) = _mask(j);
+          _buf(i,6) = _q(j);
+      } else {
+          _buf(i,0) = _x(j,0) + _dx;
+          _buf(i,1) = _x(j,1) + _dy;
+          _buf(i,2) = _x(j,2) + _dz;
+          _buf(i,3) = _tag(j);
+          _buf(i,4) = _type(j);
+          _buf(i,5) = _mask(j);
+          _buf(i,6) = _q(j);
+      }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecChargeKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap,
+                               int pbc_flag, int *pbc, ExecutionSpace space)
+{
+  X_FLOAT dx,dy,dz;
+
+  if (pbc_flag != 0) {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0];
+      dy = pbc[1];
+      dz = pbc[2];
+    }
+    if(space==Host) {
+      AtomVecChargeKokkos_PackBorder<LMPHostType,1> f(
+        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
+        iswap,h_x,h_tag,h_type,h_mask,h_q,dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+      LMPHostType::fence();
+    } else {
+      AtomVecChargeKokkos_PackBorder<LMPDeviceType,1> f(
+        buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
+        iswap,d_x,d_tag,d_type,d_mask,d_q,dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+      LMPDeviceType::fence();
+    }
+
+  } else {
+    dx = dy = dz = 0;
+    if(space==Host) {
+      AtomVecChargeKokkos_PackBorder<LMPHostType,0> f(
+        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
+        iswap,h_x,h_tag,h_type,h_mask,h_q,dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+      LMPHostType::fence();
+    } else {
+      AtomVecChargeKokkos_PackBorder<LMPDeviceType,0> f(
+        buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
+        iswap,d_x,d_tag,d_type,d_mask,d_q,dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+      LMPDeviceType::fence();
+    }
+  }
+  return n*size_border;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecChargeKokkos::pack_border(int n, int *list, double *buf,
+                               int pbc_flag, int *pbc)
+{
+  int i,j,m;
+  double dx,dy,dz;
+
+  m = 0;
+  if (pbc_flag == 0) {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0);
+      buf[m++] = h_x(j,1);
+      buf[m++] = h_x(j,2);
+      buf[m++] = ubuf(h_tag(j)).d;
+      buf[m++] = ubuf(h_type(j)).d;
+      buf[m++] = ubuf(h_mask(j)).d;
+      buf[m++] = h_q(j);
+    }
+  } else {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0];
+      dy = pbc[1];
+      dz = pbc[2];
+    }
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0) + dx;
+      buf[m++] = h_x(j,1) + dy;
+      buf[m++] = h_x(j,2) + dz;
+      buf[m++] = ubuf(h_tag(j)).d;
+      buf[m++] = ubuf(h_type(j)).d;
+      buf[m++] = ubuf(h_mask(j)).d;
+      buf[m++] = h_q(j);
+    }
+  }
+
+  if (atom->nextra_border)
+    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
+      m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]);
+
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecChargeKokkos::pack_border_vel(int n, int *list, double *buf,
+                                   int pbc_flag, int *pbc)
+{
+  int i,j,m;
+  double dx,dy,dz,dvx,dvy,dvz;
+
+  m = 0;
+  if (pbc_flag == 0) {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0);
+      buf[m++] = h_x(j,1);
+      buf[m++] = h_x(j,2);
+      buf[m++] = ubuf(h_tag(j)).d;
+      buf[m++] = ubuf(h_type(j)).d;
+      buf[m++] = ubuf(h_mask(j)).d;
+      buf[m++] = h_q[j];
+      buf[m++] = h_v(j,0);
+      buf[m++] = h_v(j,1);
+      buf[m++] = h_v(j,2);
+    }
+  } else {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0];
+      dy = pbc[1];
+      dz = pbc[2];
+    }
+    if (!deform_vremap) {
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0) + dx;
+        buf[m++] = h_x(j,1) + dy;
+        buf[m++] = h_x(j,2) + dz;
+        buf[m++] = ubuf(h_tag(j)).d;
+        buf[m++] = ubuf(h_type(j)).d;
+        buf[m++] = ubuf(h_mask(j)).d;
+        buf[m++] = h_q[j];
+        buf[m++] = h_v(j,0);
+        buf[m++] = h_v(j,1);
+        buf[m++] = h_v(j,2);
+      }
+    } else {
+      dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
+      dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
+      dvz = pbc[2]*h_rate[2];
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0) + dx;
+        buf[m++] = h_x(j,1) + dy;
+        buf[m++] = h_x(j,2) + dz;
+        buf[m++] = ubuf(h_tag(j)).d;
+        buf[m++] = ubuf(h_type(j)).d;
+        buf[m++] = ubuf(h_mask(j)).d;
+        buf[m++] = h_q[j];
+        if (mask[i] & deform_groupbit) {
+          buf[m++] = h_v(j,0) + dvx;
+          buf[m++] = h_v(j,1) + dvy;
+          buf[m++] = h_v(j,2) + dvz;
+        } else {
+          buf[m++] = h_v(j,0);
+          buf[m++] = h_v(j,1);
+          buf[m++] = h_v(j,2);
+        }
+      }
+    }
+  }
+
+  if (atom->nextra_border)
+    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
+      m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]);
+
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecChargeKokkos::pack_border_hybrid(int n, int *list, double *buf)
+{
+  int i,j,m;
+
+  m = 0;
+  for (i = 0; i < n; i++) {
+    j = list[i];
+    buf[m++] = h_q[j];
+  }
+  return m;
+}
+
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecChargeKokkos_UnpackBorder {
+  typedef DeviceType device_type;
+
+  const typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf;
+  typename ArrayTypes<DeviceType>::t_x_array _x;
+  typename ArrayTypes<DeviceType>::t_tagint_1d _tag;
+  typename ArrayTypes<DeviceType>::t_int_1d _type;
+  typename ArrayTypes<DeviceType>::t_int_1d _mask;
+  typename ArrayTypes<DeviceType>::t_float_1d _q;
+  int _first;
+
+
+  AtomVecChargeKokkos_UnpackBorder(
+      const typename ArrayTypes<DeviceType>::t_xfloat_2d_const &buf,
+      typename ArrayTypes<DeviceType>::t_x_array &x,
+      typename ArrayTypes<DeviceType>::t_tagint_1d &tag,
+      typename ArrayTypes<DeviceType>::t_int_1d &type,
+      typename ArrayTypes<DeviceType>::t_int_1d &mask,
+      typename ArrayTypes<DeviceType>::t_float_1d &q,
+      const int& first):
+    _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_q(q),_first(first){
+  };
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+      _x(i+_first,0) = _buf(i,0);
+      _x(i+_first,1) = _buf(i,1);
+      _x(i+_first,2) = _buf(i,2);
+      _tag(i+_first) = static_cast<int> (_buf(i,3));
+      _type(i+_first) = static_cast<int>  (_buf(i,4));
+      _mask(i+_first) = static_cast<int>  (_buf(i,5));
+      _q(i+_first) = _buf(i,6);
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecChargeKokkos::unpack_border_kokkos(const int &n, const int &first,
+                     const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) {
+  if (first+n >= nmax) {
+    grow(first+n+100);
+  }
+  if(space==Host) {
+    struct AtomVecChargeKokkos_UnpackBorder<LMPHostType>
+      f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_q,first);
+    Kokkos::parallel_for(n,f);
+    LMPHostType::fence();
+  } else {
+    struct AtomVecChargeKokkos_UnpackBorder<LMPDeviceType>
+      f(buf.view<LMPDeviceType>(),d_x,d_tag,d_type,d_mask,d_q,first);
+    Kokkos::parallel_for(n,f);
+    LMPDeviceType::fence();
+  }
+  modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecChargeKokkos::unpack_border(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+
+  for (i = first; i < last; i++) {
+    if (i == nmax) {
+      grow(0);
+    }
+    modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK);
+    h_x(i,0) = buf[m++];
+    h_x(i,1) = buf[m++];
+    h_x(i,2) = buf[m++];
+    h_tag(i) =  (tagint)  ubuf(buf[m++]).i;
+    h_type(i) = (int) ubuf(buf[m++]).i;
+    h_mask(i) = (int) ubuf(buf[m++]).i;
+    h_q[i] = buf[m++];
+  }
+
+  if (atom->nextra_border)
+    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
+      m += modify->fix[atom->extra_border[iextra]]->
+        unpack_border(n,first,&buf[m]);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecChargeKokkos::unpack_border_vel(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    if (i == nmax) grow(0);
+    h_x(i,0) = buf[m++];
+    h_x(i,1) = buf[m++];
+    h_x(i,2) = buf[m++];
+    h_tag(i) =  (tagint)  ubuf(buf[m++]).i;
+    h_type(i) = (int) ubuf(buf[m++]).i;
+    h_mask(i) = (int) ubuf(buf[m++]).i;
+    h_q[i] = buf[m++];
+    h_v(i,0) = buf[m++];
+    h_v(i,1) = buf[m++];
+    h_v(i,2) = buf[m++];
+  }
+
+  if (atom->nextra_border)
+    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
+      m += modify->fix[atom->extra_border[iextra]]->
+        unpack_border(n,first,&buf[m]);
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecChargeKokkos::unpack_border_hybrid(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++)
+    h_q[i] = buf[m++];
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecChargeKokkos_PackExchangeFunctor {
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+  typename AT::t_x_array_randomread _x;
+  typename AT::t_v_array_randomread _v;
+  typename AT::t_tagint_1d_randomread _tag;
+  typename AT::t_int_1d_randomread _type;
+  typename AT::t_int_1d_randomread _mask;
+  typename AT::t_imageint_1d_randomread _image;
+  typename AT::t_float_1d_randomread _q;
+  typename AT::t_x_array _xw;
+  typename AT::t_v_array _vw;
+  typename AT::t_tagint_1d _tagw;
+  typename AT::t_int_1d _typew;
+  typename AT::t_int_1d _maskw;
+  typename AT::t_imageint_1d _imagew;
+  typename AT::t_float_1d _qw;
+
+  typename AT::t_xfloat_2d_um _buf;
+  typename AT::t_int_1d_const _sendlist;
+  typename AT::t_int_1d_const _copylist;
+  int _nlocal,_dim;
+  X_FLOAT _lo,_hi;
+
+  AtomVecChargeKokkos_PackExchangeFunctor(
+      const AtomKokkos* atom,
+      const typename AT::tdual_xfloat_2d buf,
+      typename AT::tdual_int_1d sendlist,
+      typename AT::tdual_int_1d copylist,int nlocal, int dim,
+                X_FLOAT lo, X_FLOAT hi):
+    _x(atom->k_x.view<DeviceType>()),
+    _v(atom->k_v.view<DeviceType>()),
+    _tag(atom->k_tag.view<DeviceType>()),
+    _type(atom->k_type.view<DeviceType>()),
+    _mask(atom->k_mask.view<DeviceType>()),
+    _image(atom->k_image.view<DeviceType>()),
+    _q(atom->k_q.view<DeviceType>()),
+    _xw(atom->k_x.view<DeviceType>()),
+    _vw(atom->k_v.view<DeviceType>()),
+    _tagw(atom->k_tag.view<DeviceType>()),
+    _typew(atom->k_type.view<DeviceType>()),
+    _maskw(atom->k_mask.view<DeviceType>()),
+    _imagew(atom->k_image.view<DeviceType>()),
+    _qw(atom->k_q.view<DeviceType>()),
+    _sendlist(sendlist.template view<DeviceType>()),
+    _copylist(copylist.template view<DeviceType>()),
+    _nlocal(nlocal),_dim(dim),
+    _lo(lo),_hi(hi){
+    const size_t elements = 12;
+    const int maxsendlist = (buf.template view<DeviceType>().dimension_0()*
+                             buf.template view<DeviceType>().dimension_1())/elements;
+
+    buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int &mysend) const {
+    const int i = _sendlist(mysend);
+    _buf(mysend,0) = 12;
+    _buf(mysend,1) = _x(i,0);
+    _buf(mysend,2) = _x(i,1);
+    _buf(mysend,3) = _x(i,2);
+    _buf(mysend,4) = _v(i,0);
+    _buf(mysend,5) = _v(i,1);
+    _buf(mysend,6) = _v(i,2);
+    _buf(mysend,7) = _tag[i];
+    _buf(mysend,8) = _type[i];
+    _buf(mysend,9) = _mask[i];
+    _buf(mysend,10) = _image[i];
+    _buf(mysend,11) = _q[i];
+    const int j = _copylist(mysend);
+
+    if(j>-1) {
+    _xw(i,0) = _x(j,0);
+    _xw(i,1) = _x(j,1);
+    _xw(i,2) = _x(j,2);
+    _vw(i,0) = _v(j,0);
+    _vw(i,1) = _v(j,1);
+    _vw(i,2) = _v(j,2);
+    _tagw(i) = _tag(j);
+    _typew(i) = _type(j);
+    _maskw(i) = _mask(j);
+    _imagew(i) = _image(j);
+    _qw(i) = _q(j);
+    }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecChargeKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf,
+                                              DAT::tdual_int_1d k_sendlist,
+                                              DAT::tdual_int_1d k_copylist,
+                                              ExecutionSpace space,int dim,
+                                              X_FLOAT lo,X_FLOAT hi )
+{
+  if(nsend > (int) (k_buf.view<LMPHostType>().dimension_0()*k_buf.view<LMPHostType>().dimension_1())/12) {
+    int newsize = nsend*12/k_buf.view<LMPHostType>().dimension_1()+1;
+    k_buf.resize(newsize,k_buf.view<LMPHostType>().dimension_1());
+  }
+  if(space == Host) {
+    AtomVecChargeKokkos_PackExchangeFunctor<LMPHostType>
+      f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
+    Kokkos::parallel_for(nsend,f);
+    LMPHostType::fence();
+    return nsend*12;
+  } else {
+    AtomVecChargeKokkos_PackExchangeFunctor<LMPDeviceType>
+      f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
+    Kokkos::parallel_for(nsend,f);
+    LMPDeviceType::fence();
+    return nsend*12;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecChargeKokkos::pack_exchange(int i, double *buf)
+{
+  int m = 1;
+  buf[m++] = h_x(i,0);
+  buf[m++] = h_x(i,1);
+  buf[m++] = h_x(i,2);
+  buf[m++] = h_v(i,0);
+  buf[m++] = h_v(i,1);
+  buf[m++] = h_v(i,2);
+  buf[m++] = ubuf(h_tag(i)).d;
+  buf[m++] = ubuf(h_type(i)).d;
+  buf[m++] = ubuf(h_mask(i)).d;
+  buf[m++] = ubuf(h_image(i)).d;
+  buf[m++] = h_q[i];
+
+  if (atom->nextra_grow)
+    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
+      m += modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&buf[m]);
+
+  buf[0] = m;
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecChargeKokkos_UnpackExchangeFunctor {
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+  typename AT::t_x_array _x;
+  typename AT::t_v_array _v;
+  typename AT::t_tagint_1d _tag;
+  typename AT::t_int_1d _type;
+  typename AT::t_int_1d _mask;
+  typename AT::t_imageint_1d _image;
+  typename AT::t_float_1d _q;
+  typename AT::t_xfloat_2d_um _buf;
+  typename AT::t_int_1d _nlocal;
+  int _dim;
+  X_FLOAT _lo,_hi;
+
+  AtomVecChargeKokkos_UnpackExchangeFunctor(
+      const AtomKokkos* atom,
+      const typename AT::tdual_xfloat_2d buf,
+      typename AT::tdual_int_1d nlocal,
+      int dim, X_FLOAT lo, X_FLOAT hi):
+    _x(atom->k_x.view<DeviceType>()),
+    _v(atom->k_v.view<DeviceType>()),
+    _tag(atom->k_tag.view<DeviceType>()),
+    _type(atom->k_type.view<DeviceType>()),
+    _mask(atom->k_mask.view<DeviceType>()),
+    _image(atom->k_image.view<DeviceType>()),
+    _q(atom->k_q.view<DeviceType>()),
+    _nlocal(nlocal.template view<DeviceType>()),_dim(dim),
+    _lo(lo),_hi(hi){
+    const size_t elements = 12;
+    const int maxsendlist = (buf.template view<DeviceType>().dimension_0()*buf.template view<DeviceType>().dimension_1())/elements;
+
+    buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int &myrecv) const {
+    X_FLOAT x = _buf(myrecv,_dim+1);
+    if (x >= _lo && x < _hi) {
+      int i = Kokkos::atomic_fetch_add(&_nlocal(0),1);
+      _x(i,0) = _buf(myrecv,1);
+      _x(i,1) = _buf(myrecv,2);
+      _x(i,2) = _buf(myrecv,3);
+      _v(i,0) = _buf(myrecv,4);
+      _v(i,1) = _buf(myrecv,5);
+      _v(i,2) = _buf(myrecv,6);
+      _tag[i] = _buf(myrecv,7);
+      _type[i] = _buf(myrecv,8);
+      _mask[i] = _buf(myrecv,9);
+      _image[i] = _buf(myrecv,10);
+      _q[i] = _buf(myrecv,11);
+    }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecChargeKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,
+                                                int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,
+                                                ExecutionSpace space) {
+  if(space == Host) {
+    k_count.h_view(0) = nlocal;
+    AtomVecChargeKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
+    Kokkos::parallel_for(nrecv/12,f);
+    LMPHostType::fence();
+    return k_count.h_view(0);
+  } else {
+    k_count.h_view(0) = nlocal;
+    k_count.modify<LMPHostType>();
+    k_count.sync<LMPDeviceType>();
+    AtomVecChargeKokkos_UnpackExchangeFunctor<LMPDeviceType>
+      f(atomKK,k_buf,k_count,dim,lo,hi);
+    Kokkos::parallel_for(nrecv/12,f);
+    LMPDeviceType::fence();
+    k_count.modify<LMPDeviceType>();
+    k_count.sync<LMPHostType>();
+
+    return k_count.h_view(0);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecChargeKokkos::unpack_exchange(double *buf)
+{
+  int nlocal = atom->nlocal;
+  if (nlocal == nmax) grow(0);
+  modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
+           MASK_MASK | IMAGE_MASK | Q_MASK);
+
+  int m = 1;
+  h_x(nlocal,0) = buf[m++];
+  h_x(nlocal,1) = buf[m++];
+  h_x(nlocal,2) = buf[m++];
+  h_v(nlocal,0) = buf[m++];
+  h_v(nlocal,1) = buf[m++];
+  h_v(nlocal,2) = buf[m++];
+  h_tag(nlocal) = (tagint) ubuf(buf[m++]).i;
+  h_type(nlocal) = (int) ubuf(buf[m++]).i;
+  h_mask(nlocal) = (int) ubuf(buf[m++]).i;
+  h_image(nlocal) = (imageint) ubuf(buf[m++]).i;
+  h_q[nlocal] = buf[m++];
+
+  if (atom->nextra_grow)
+    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
+      m += modify->fix[atom->extra_grow[iextra]]->
+        unpack_exchange(nlocal,&buf[m]);
+
+  atom->nlocal++;
+  return m;
+}
+
+/* ----------------------------------------------------------------------
+   size of restart data for all atoms owned by this proc
+   include extra data stored by fixes
+------------------------------------------------------------------------- */
+
+int AtomVecChargeKokkos::size_restart()
+{
+  int i;
+
+  int nlocal = atom->nlocal;
+  int n = 12 * nlocal;
+
+  if (atom->nextra_restart)
+    for (int iextra = 0; iextra < atom->nextra_restart; iextra++)
+      for (i = 0; i < nlocal; i++)
+        n += modify->fix[atom->extra_restart[iextra]]->size_restart(i);
+
+  return n;
+}
+
+/* ----------------------------------------------------------------------
+   pack atom I's data for restart file including extra quantities
+   xyz must be 1st 3 values, so that read_restart can test on them
+   molecular types may be negative, but write as positive
+------------------------------------------------------------------------- */
+
+int AtomVecChargeKokkos::pack_restart(int i, double *buf)
+{
+  int m = 1;
+  buf[m++] = h_x(i,0);
+  buf[m++] = h_x(i,1);
+  buf[m++] = h_x(i,2);
+  buf[m++] = ubuf(h_tag(i)).d;
+  buf[m++] = ubuf(h_type(i)).d;
+  buf[m++] = ubuf(h_mask(i)).d;
+  buf[m++] = ubuf(h_image(i)).d;
+  buf[m++] = h_v(i,0);
+  buf[m++] = h_v(i,1);
+  buf[m++] = h_v(i,2);
+
+  buf[m++] = h_q[i];
+
+  if (atom->nextra_restart)
+    for (int iextra = 0; iextra < atom->nextra_restart; iextra++)
+      m += modify->fix[atom->extra_restart[iextra]]->pack_restart(i,&buf[m]);
+
+  buf[0] = m;
+  return m;
+}
+
+/* ----------------------------------------------------------------------
+   unpack data for one atom from restart file including extra quantities
+------------------------------------------------------------------------- */
+
+int AtomVecChargeKokkos::unpack_restart(double *buf)
+{
+  int nlocal = atom->nlocal;
+  if (nlocal == nmax) {
+    grow(0);
+    if (atom->nextra_store)
+      memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra");
+  }
+
+  int m = 1;
+  h_x(nlocal,0) = buf[m++];
+  h_x(nlocal,1) = buf[m++];
+  h_x(nlocal,2) = buf[m++];
+  h_tag(nlocal) = (tagint) ubuf(buf[m++]).i;
+  h_type(nlocal) = (int) ubuf(buf[m++]).i;
+  h_mask(nlocal) = (int) ubuf(buf[m++]).i;
+  h_image(nlocal) = (imageint) ubuf(buf[m++]).i;
+  h_v(nlocal,0) = buf[m++];
+  h_v(nlocal,1) = buf[m++];
+  h_v(nlocal,2) = buf[m++];
+
+  h_q[nlocal] = buf[m++];
+
+  double **extra = atom->extra;
+  if (atom->nextra_store) {
+    int size = static_cast<int> (ubuf(buf[m++]).i) - m;
+    for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++];
+  }
+
+  atom->nlocal++;
+  return m;
+}
+
+/* ----------------------------------------------------------------------
+   create one atom of itype at coord
+   set other values to defaults
+------------------------------------------------------------------------- */
+
+void AtomVecChargeKokkos::create_atom(int itype, double *coord)
+{
+  int nlocal = atom->nlocal;
+  if (nlocal == nmax) {
+    //if(nlocal>2) printf("typeA: %i %i\n",type[0],type[1]);
+    atomKK->modified(Host,ALL_MASK);
+    grow(0);
+    //if(nlocal>2) printf("typeB: %i %i\n",type[0],type[1]);
+  }
+  atomKK->modified(Host,ALL_MASK);
+
+  tag[nlocal] = 0;
+  type[nlocal] = itype;
+  h_x(nlocal,0) = coord[0];
+  h_x(nlocal,1) = coord[1];
+  h_x(nlocal,2) = coord[2];
+  h_mask[nlocal] = 1;
+  h_image[nlocal] = ((imageint) IMGMAX << IMG2BITS) |
+    ((imageint) IMGMAX << IMGBITS) | IMGMAX;
+  h_v(nlocal,0) = 0.0;
+  h_v(nlocal,1) = 0.0;
+  h_v(nlocal,2) = 0.0;
+
+  h_q[nlocal] = 0.0;
+
+  atom->nlocal++;
+}
+
+/* ----------------------------------------------------------------------
+   unpack one line from Atoms section of data file
+   initialize other atom quantities
+------------------------------------------------------------------------- */
+
+void AtomVecChargeKokkos::data_atom(double *coord, imageint imagetmp,
+                                    char **values)
+{
+  int nlocal = atom->nlocal;
+  if (nlocal == nmax) grow(0);
+
+  h_tag[nlocal] = atoi(values[0]);
+  if (tag[nlocal] <= 0)
+    error->one(FLERR,"Invalid atom ID in Atoms section of data file");
+
+  h_type[nlocal] = atoi(values[1]);
+  if (type[nlocal] <= 0 || type[nlocal] > atom->ntypes)
+    error->one(FLERR,"Invalid atom type in Atoms section of data file");
+
+  h_q[nlocal] = atof(values[2]);
+
+  h_x(nlocal,0) = coord[0];
+  h_x(nlocal,1) = coord[1];
+  h_x(nlocal,2) = coord[2];
+
+  h_image[nlocal] = imagetmp;
+
+  h_mask[nlocal] = 1;
+  h_v(nlocal,0) = 0.0;
+  h_v(nlocal,1) = 0.0;
+  h_v(nlocal,2) = 0.0;
+
+  atom->nlocal++;
+}
+/* ----------------------------------------------------------------------
+   unpack hybrid quantities from one line in Atoms section of data file
+   initialize other atom quantities for this sub-style
+------------------------------------------------------------------------- */
+
+int AtomVecChargeKokkos::data_atom_hybrid(int nlocal, char **values)
+{
+  h_q[nlocal] = atof(values[0]);
+
+  return 1;
+}
+/* ----------------------------------------------------------------------
+   pack atom info for data file including 3 image flags
+------------------------------------------------------------------------- */
+
+void AtomVecChargeKokkos::pack_data(double **buf)
+{
+  int nlocal = atom->nlocal;
+  for (int i = 0; i < nlocal; i++) {
+    buf[i][0] = h_tag[i];
+    buf[i][1] = h_type[i];
+    buf[i][2] = h_q[i];
+    buf[i][3] = h_x(i,0);
+    buf[i][4] = h_x(i,1);
+    buf[i][5] = h_x(i,2);
+    buf[i][6] = (h_image[i] & IMGMASK) - IMGMAX;
+    buf[i][7] = (h_image[i] >> IMGBITS & IMGMASK) - IMGMAX;
+    buf[i][8] = (h_image[i] >> IMG2BITS) - IMGMAX;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   pack hybrid atom info for data file
+------------------------------------------------------------------------- */
+
+int AtomVecChargeKokkos::pack_data_hybrid(int i, double *buf)
+{
+  buf[0] = h_q[i];
+  return 1;
+}
+
+/* ----------------------------------------------------------------------
+   write atom info to data file including 3 image flags
+------------------------------------------------------------------------- */
+
+void AtomVecChargeKokkos::write_data(FILE *fp, int n, double **buf)
+{
+  for (int i = 0; i < n; i++)
+    fprintf(fp,"%d %d %-1.16e %-1.16e %-1.16e %-1.16e %d %d %d\n",
+            (int) buf[i][0],(int) buf[i][1],buf[i][2],buf[i][3],buf[i][4],buf[i][5],
+            (int) buf[i][6],(int) buf[i][7],(int) buf[i][8]);
+}
+
+/* ----------------------------------------------------------------------
+   write hybrid atom info to data file
+------------------------------------------------------------------------- */
+
+int AtomVecChargeKokkos::write_data_hybrid(FILE *fp, double *buf)
+{
+  fprintf(fp," %-1.16e",buf[0]);
+  return 1;
+}
+
+/* ----------------------------------------------------------------------
+   return # of bytes of allocated memory
+------------------------------------------------------------------------- */
+
+bigint AtomVecChargeKokkos::memory_usage()
+{
+  bigint bytes = 0;
+
+  if (atom->memcheck("tag")) bytes += memory->usage(tag,nmax);
+  if (atom->memcheck("type")) bytes += memory->usage(type,nmax);
+  if (atom->memcheck("mask")) bytes += memory->usage(mask,nmax);
+  if (atom->memcheck("image")) bytes += memory->usage(image,nmax);
+  if (atom->memcheck("x")) bytes += memory->usage(x,nmax,3);
+  if (atom->memcheck("v")) bytes += memory->usage(v,nmax,3);
+  if (atom->memcheck("f")) bytes += memory->usage(f,nmax*commKK->nthreads,3);
+
+  if (atom->memcheck("q")) bytes += memory->usage(q,nmax);
+
+  return bytes;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecChargeKokkos::sync(ExecutionSpace space, unsigned int mask)
+{
+  if (space == Device) {
+    if (mask & X_MASK) atomKK->k_x.sync<LMPDeviceType>();
+    if (mask & V_MASK) atomKK->k_v.sync<LMPDeviceType>();
+    if (mask & F_MASK) atomKK->k_f.sync<LMPDeviceType>();
+    if (mask & TAG_MASK) atomKK->k_tag.sync<LMPDeviceType>();
+    if (mask & TYPE_MASK) atomKK->k_type.sync<LMPDeviceType>();
+    if (mask & MASK_MASK) atomKK->k_mask.sync<LMPDeviceType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPDeviceType>();
+    if (mask && Q_MASK) atomKK->k_q.sync<LMPDeviceType>();
+  } else {
+    if (mask & X_MASK) atomKK->k_x.sync<LMPHostType>();
+    if (mask & V_MASK) atomKK->k_v.sync<LMPHostType>();
+    if (mask & F_MASK) atomKK->k_f.sync<LMPHostType>();
+    if (mask & TAG_MASK) atomKK->k_tag.sync<LMPHostType>();
+    if (mask & TYPE_MASK) atomKK->k_type.sync<LMPHostType>();
+    if (mask & MASK_MASK) atomKK->k_mask.sync<LMPHostType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPHostType>();
+    if (mask && Q_MASK) atomKK->k_q.sync<LMPHostType>();
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecChargeKokkos::modified(ExecutionSpace space, unsigned int mask)
+{
+  if (space == Device) {
+    if (mask & X_MASK) atomKK->k_x.modify<LMPDeviceType>();
+    if (mask & V_MASK) atomKK->k_v.modify<LMPDeviceType>();
+    if (mask & F_MASK) atomKK->k_f.modify<LMPDeviceType>();
+    if (mask & TAG_MASK) atomKK->k_tag.modify<LMPDeviceType>();
+    if (mask & TYPE_MASK) atomKK->k_type.modify<LMPDeviceType>();
+    if (mask & MASK_MASK) atomKK->k_mask.modify<LMPDeviceType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPDeviceType>();
+    if (mask && Q_MASK) atomKK->k_q.modify<LMPDeviceType>();
+  } else {
+    if (mask & X_MASK) atomKK->k_x.modify<LMPHostType>();
+    if (mask & V_MASK) atomKK->k_v.modify<LMPHostType>();
+    if (mask & F_MASK) atomKK->k_f.modify<LMPHostType>();
+    if (mask & TAG_MASK) atomKK->k_tag.modify<LMPHostType>();
+    if (mask & TYPE_MASK) atomKK->k_type.modify<LMPHostType>();
+    if (mask & MASK_MASK) atomKK->k_mask.modify<LMPHostType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPHostType>();
+    if (mask && Q_MASK) atomKK->k_q.modify<LMPHostType>();
+  }
+}
diff --git a/src/KOKKOS/atom_vec_charge_kokkos.h b/src/KOKKOS/atom_vec_charge_kokkos.h
new file mode 100644
index 0000000000000000000000000000000000000000..98f0f363b96d04b5c646423ae6ded6e22407fb5d
--- /dev/null
+++ b/src/KOKKOS/atom_vec_charge_kokkos.h
@@ -0,0 +1,126 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef ATOM_CLASS
+
+AtomStyle(charge/kk,AtomVecChargeKokkos)
+
+#else
+
+#ifndef LMP_ATOM_VEC_CHARGE_KOKKOS_H
+#define LMP_ATOM_VEC_CHARGE_KOKKOS_H
+
+#include "atom_vec_kokkos.h"
+#include "kokkos_type.h"
+
+namespace LAMMPS_NS {
+
+class AtomVecChargeKokkos : public AtomVecKokkos {
+ public:
+  AtomVecChargeKokkos(class LAMMPS *);
+  virtual ~AtomVecChargeKokkos() {}
+  void grow(int);
+  void copy(int, int, int);
+  int pack_comm(int, int *, double *, int, int *);
+  int pack_comm_vel(int, int *, double *, int, int *);
+  void unpack_comm(int, int, double *);
+  void unpack_comm_vel(int, int, double *);
+  int pack_reverse(int, int, double *);
+  void unpack_reverse(int, int *, double *);
+  int pack_border(int, int *, double *, int, int *);
+  int pack_border_vel(int, int *, double *, int, int *);
+  int pack_border_hybrid(int, int *, double *);
+  void unpack_border(int, int, double *);
+  void unpack_border_vel(int, int, double *);
+  int unpack_border_hybrid(int, int, double *);
+  int pack_exchange(int, double *);
+  int unpack_exchange(double *);
+  int size_restart();
+  int pack_restart(int, double *);
+  int unpack_restart(double *);
+  void create_atom(int, double *);
+  void data_atom(double *, tagint, char **);
+  int data_atom_hybrid(int , char **);
+  void pack_data(double **);
+  int pack_data_hybrid(int, double *);
+  void write_data(FILE *, int, double **);
+  int write_data_hybrid(FILE *, double *);
+  bigint memory_usage();
+
+  void grow_reset();
+  int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
+                       const int & iswap,
+                       const DAT::tdual_xfloat_2d &buf,
+                       const int &pbc_flag, const int pbc[]);
+  void unpack_comm_kokkos(const int &n, const int &nfirst,
+                          const DAT::tdual_xfloat_2d &buf);
+  int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
+                     const int & iswap, const int nfirst,
+                     const int &pbc_flag, const int pbc[]);
+  int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
+                         DAT::tdual_xfloat_2d buf,int iswap,
+                         int pbc_flag, int *pbc, ExecutionSpace space);
+  void unpack_border_kokkos(const int &n, const int &nfirst,
+                            const DAT::tdual_xfloat_2d &buf,
+                            ExecutionSpace space);
+  int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
+                           DAT::tdual_int_1d k_sendlist,
+                           DAT::tdual_int_1d k_copylist,
+                           ExecutionSpace space, int dim,
+                           X_FLOAT lo, X_FLOAT hi);
+  int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
+                             int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
+                             ExecutionSpace space);
+
+  void sync(ExecutionSpace space, unsigned int mask);
+  void modified(ExecutionSpace space, unsigned int mask);
+
+ protected:
+  tagint *tag;
+  int *type,*mask;
+  imageint *image;
+  double **x,**v,**f;
+
+  double *q;
+
+  DAT::t_tagint_1d d_tag;
+  HAT::t_tagint_1d h_tag;
+
+  DAT::t_int_1d d_type, d_mask;
+  HAT::t_int_1d h_type, h_mask;
+
+  DAT::t_imageint_1d d_image;
+  HAT::t_imageint_1d h_image;
+
+  DAT::t_x_array d_x;
+  DAT::t_v_array d_v;
+  DAT::t_f_array d_f;
+  HAT::t_x_array h_x;
+  HAT::t_v_array h_v;
+  HAT::t_f_array h_f;
+
+  DAT::t_float_1d d_q;
+
+  HAT::t_float_1d h_q;
+
+  DAT::tdual_int_1d k_count;
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+*/
diff --git a/src/KOKKOS/atom_vec_full_kokkos.cpp b/src/KOKKOS/atom_vec_full_kokkos.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..684c81c616ea561a14b19a48b409e9deca0f1604
--- /dev/null
+++ b/src/KOKKOS/atom_vec_full_kokkos.cpp
@@ -0,0 +1,2321 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "stdlib.h"
+#include "atom_vec_full_kokkos.h"
+#include "atom_kokkos.h"
+#include "comm_kokkos.h"
+#include "domain.h"
+#include "modify.h"
+#include "fix.h"
+#include "atom_masks.h"
+#include "memory.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+#define DELTA 10000
+
+/* ---------------------------------------------------------------------- */
+
+AtomVecFullKokkos::AtomVecFullKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp)
+{
+  molecular = 1;
+  bonds_allow = angles_allow = dihedrals_allow = impropers_allow = 1;
+  mass_type = 1;
+
+  comm_x_only = comm_f_only = 1;
+  size_forward = 3;
+  size_reverse = 3;
+  size_border = 8;
+  size_velocity = 3;
+  size_data_atom = 7;
+  size_data_vel = 4;
+  xcol_data = 5;
+
+  atom->molecule_flag = atom->q_flag = 1;
+
+  k_count = DAT::tdual_int_1d("atom::k_count",1);
+  atomKK = (AtomKokkos *) atom;
+  commKK = (CommKokkos *) comm;
+}
+
+/* ----------------------------------------------------------------------
+   grow atom arrays
+   n = 0 grows arrays by DELTA
+   n > 0 allocates arrays to size n
+------------------------------------------------------------------------- */
+
+void AtomVecFullKokkos::grow(int n)
+{
+  if (n == 0) nmax += DELTA;
+  else nmax = n;
+  atomKK->nmax = nmax;
+  if (nmax < 0 || nmax > MAXSMALLINT)
+    error->one(FLERR,"Per-processor system is too big");
+
+  sync(Device,ALL_MASK);
+  modified(Device,ALL_MASK);
+
+  memory->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag");
+  memory->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type");
+  memory->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask");
+  memory->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image");
+
+  memory->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x");
+  memory->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v");
+  memory->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f");
+
+  memory->grow_kokkos(atomKK->k_q,atomKK->q,nmax,"atom:q");
+  memory->grow_kokkos(atomKK->k_molecule,atomKK->molecule,nmax,"atom:molecule");
+
+  memory->grow_kokkos(atomKK->k_nspecial,atomKK->nspecial,nmax,3,"atom:nspecial");
+  memory->grow_kokkos(atomKK->k_special,atomKK->special,nmax,atomKK->maxspecial,
+		      "atom:special");
+  memory->grow_kokkos(atomKK->k_num_bond,atomKK->num_bond,nmax,"atom:num_bond");
+  memory->grow_kokkos(atomKK->k_bond_type,atomKK->bond_type,nmax,atomKK->bond_per_atom,
+		      "atom:bond_type");
+  memory->grow_kokkos(atomKK->k_bond_atom,atomKK->bond_atom,nmax,atomKK->bond_per_atom,
+		      "atom:bond_atom");
+
+  memory->grow_kokkos(atomKK->k_num_angle,atomKK->num_angle,nmax,"atom:num_angle");
+  memory->grow_kokkos(atomKK->k_angle_type,atomKK->angle_type,nmax,atomKK->angle_per_atom,
+		      "atom:angle_type");
+  memory->grow_kokkos(atomKK->k_angle_atom1,atomKK->angle_atom1,nmax,atomKK->angle_per_atom,
+		      "atom:angle_atom1");
+  memory->grow_kokkos(atomKK->k_angle_atom2,atomKK->angle_atom2,nmax,atomKK->angle_per_atom,
+		      "atom:angle_atom2");
+  memory->grow_kokkos(atomKK->k_angle_atom3,atomKK->angle_atom3,nmax,atomKK->angle_per_atom,
+		      "atom:angle_atom3");
+
+  memory->grow_kokkos(atomKK->k_num_dihedral,atomKK->num_dihedral,nmax,"atom:num_dihedral");
+  memory->grow_kokkos(atomKK->k_dihedral_type,atomKK->dihedral_type,nmax,
+                      atomKK->dihedral_per_atom,"atom:dihedral_type");
+  memory->grow_kokkos(atomKK->k_dihedral_atom1,atomKK->dihedral_atom1,nmax,
+                      atomKK->dihedral_per_atom,"atom:dihedral_atom1");
+  memory->grow_kokkos(atomKK->k_dihedral_atom2,atomKK->dihedral_atom2,nmax,
+                      atomKK->dihedral_per_atom,"atom:dihedral_atom2");
+  memory->grow_kokkos(atomKK->k_dihedral_atom3,atomKK->dihedral_atom3,nmax,
+                      atomKK->dihedral_per_atom,"atom:dihedral_atom3");
+  memory->grow_kokkos(atomKK->k_dihedral_atom4,atomKK->dihedral_atom4,nmax,
+                      atomKK->dihedral_per_atom,"atom:dihedral_atom4");
+
+  memory->grow_kokkos(atomKK->k_num_improper,atomKK->num_improper,nmax,"atom:num_improper");
+  memory->grow_kokkos(atomKK->k_improper_type,atomKK->improper_type,nmax,
+                      atomKK->improper_per_atom,"atom:improper_type");
+  memory->grow_kokkos(atomKK->k_improper_atom1,atomKK->improper_atom1,nmax,
+                      atomKK->improper_per_atom,"atom:improper_atom1");
+  memory->grow_kokkos(atomKK->k_improper_atom2,atomKK->improper_atom2,nmax,
+                      atomKK->improper_per_atom,"atom:improper_atom2");
+  memory->grow_kokkos(atomKK->k_improper_atom3,atomKK->improper_atom3,nmax,
+                      atomKK->improper_per_atom,"atom:improper_atom3");
+  memory->grow_kokkos(atomKK->k_improper_atom4,atomKK->improper_atom4,nmax,
+                      atomKK->improper_per_atom,"atom:improper_atom4");
+
+  grow_reset();
+  sync(Host,ALL_MASK);
+
+  if (atom->nextra_grow)
+    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
+      modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax);
+}
+
+/* ----------------------------------------------------------------------
+   reset local array ptrs
+------------------------------------------------------------------------- */
+
+void AtomVecFullKokkos::grow_reset()
+{
+  tag = atomKK->tag;
+  d_tag = atomKK->k_tag.d_view;
+  h_tag = atomKK->k_tag.h_view;
+
+  type = atomKK->type;
+  d_type = atomKK->k_type.d_view;
+  h_type = atomKK->k_type.h_view;
+  mask = atomKK->mask;
+  d_mask = atomKK->k_mask.d_view;
+  h_mask = atomKK->k_mask.h_view;
+  image = atomKK->image;
+  d_image = atomKK->k_image.d_view;
+  h_image = atomKK->k_image.h_view;
+
+  x = atomKK->x;
+  d_x = atomKK->k_x.d_view;
+  h_x = atomKK->k_x.h_view;
+  v = atomKK->v;
+  d_v = atomKK->k_v.d_view;
+  h_v = atomKK->k_v.h_view;
+  f = atomKK->f;
+  d_f = atomKK->k_f.d_view;
+  h_f = atomKK->k_f.h_view;
+
+  q = atomKK->q;
+  d_q = atomKK->k_q.d_view;
+  h_q = atomKK->k_q.h_view;
+
+  molecule = atomKK->molecule;
+  d_molecule = atomKK->k_molecule.d_view;
+  h_molecule = atomKK->k_molecule.h_view;
+
+  nspecial = atomKK->nspecial;
+  d_nspecial = atomKK->k_nspecial.d_view;
+  h_nspecial = atomKK->k_nspecial.h_view;
+  special = atomKK->special;
+  d_special = atomKK->k_special.d_view;
+  h_special = atomKK->k_special.h_view;
+
+  num_bond = atomKK->num_bond;
+  d_num_bond = atomKK->k_num_bond.d_view;
+  h_num_bond = atomKK->k_num_bond.h_view;
+  bond_type = atomKK->bond_type;
+  d_bond_type = atomKK->k_bond_type.d_view;
+  h_bond_type = atomKK->k_bond_type.h_view;
+  bond_atom = atomKK->bond_atom;
+  d_bond_atom = atomKK->k_bond_atom.d_view;
+  h_bond_atom = atomKK->k_bond_atom.h_view;
+
+  num_angle = atomKK->num_angle;
+  d_num_angle = atomKK->k_num_angle.d_view;
+  h_num_angle = atomKK->k_num_angle.h_view;
+  angle_type = atomKK->angle_type;
+  d_angle_type = atomKK->k_angle_type.d_view;
+  h_angle_type = atomKK->k_angle_type.h_view;
+  angle_atom1 = atomKK->angle_atom1;
+  d_angle_atom1 = atomKK->k_angle_atom1.d_view;
+  h_angle_atom1 = atomKK->k_angle_atom1.h_view;
+  angle_atom2 = atomKK->angle_atom2;
+  d_angle_atom2 = atomKK->k_angle_atom2.d_view;
+  h_angle_atom2 = atomKK->k_angle_atom2.h_view;
+  angle_atom3 = atomKK->angle_atom3;
+  d_angle_atom3 = atomKK->k_angle_atom3.d_view;
+  h_angle_atom3 = atomKK->k_angle_atom3.h_view;
+
+  num_dihedral = atomKK->num_dihedral;
+  d_num_dihedral = atomKK->k_num_dihedral.d_view;
+  h_num_dihedral = atomKK->k_num_dihedral.h_view;
+  dihedral_type = atomKK->dihedral_type;
+  d_dihedral_type = atomKK->k_dihedral_type.d_view;
+  h_dihedral_type = atomKK->k_dihedral_type.h_view;
+  dihedral_atom1 = atomKK->dihedral_atom1;
+  d_dihedral_atom1 = atomKK->k_dihedral_atom1.d_view;
+  h_dihedral_atom1 = atomKK->k_dihedral_atom1.h_view;
+  dihedral_atom2 = atomKK->dihedral_atom2;
+  d_dihedral_atom2 = atomKK->k_dihedral_atom2.d_view;
+  h_dihedral_atom2 = atomKK->k_dihedral_atom2.h_view;
+  dihedral_atom3 = atomKK->dihedral_atom3;
+  d_dihedral_atom3 = atomKK->k_dihedral_atom3.d_view;
+  h_dihedral_atom3 = atomKK->k_dihedral_atom3.h_view;
+  dihedral_atom4 = atomKK->dihedral_atom4;
+  d_dihedral_atom4 = atomKK->k_dihedral_atom4.d_view;
+  h_dihedral_atom4 = atomKK->k_dihedral_atom4.h_view;
+
+  num_improper = atomKK->num_improper;
+  d_num_improper = atomKK->k_num_improper.d_view;
+  h_num_improper = atomKK->k_num_improper.h_view;
+  improper_type = atomKK->improper_type;
+  d_improper_type = atomKK->k_improper_type.d_view;
+  h_improper_type = atomKK->k_improper_type.h_view;
+  improper_atom1 = atomKK->improper_atom1;
+  d_improper_atom1 = atomKK->k_improper_atom1.d_view;
+  h_improper_atom1 = atomKK->k_improper_atom1.h_view;
+  improper_atom2 = atomKK->improper_atom2;
+  d_improper_atom2 = atomKK->k_improper_atom2.d_view;
+  h_improper_atom2 = atomKK->k_improper_atom2.h_view;
+  improper_atom3 = atomKK->improper_atom3;
+  d_improper_atom3 = atomKK->k_improper_atom3.d_view;
+  h_improper_atom3 = atomKK->k_improper_atom3.h_view;
+  improper_atom4 = atomKK->improper_atom4;
+  d_improper_atom4 = atomKK->k_improper_atom4.d_view;
+  h_improper_atom4 = atomKK->k_improper_atom4.h_view;
+}
+
+/* ----------------------------------------------------------------------
+   copy atom I info to atom J
+------------------------------------------------------------------------- */
+
+void AtomVecFullKokkos::copy(int i, int j, int delflag)
+{
+  int k;
+
+  h_tag[j] = h_tag[i];
+  h_type[j] = h_type[i];
+  mask[j] = mask[i];
+  h_image[j] = h_image[i];
+  h_x(j,0) = h_x(i,0);
+  h_x(j,1) = h_x(i,1);
+  h_x(j,2) = h_x(i,2);
+  h_v(j,0) = h_v(i,0);
+  h_v(j,1) = h_v(i,1);
+  h_v(j,2) = h_v(i,2);
+
+  h_q[j] = h_q[i];
+  h_molecule(j) = h_molecule(i);
+
+  h_num_bond(j) = h_num_bond(i);
+  for (k = 0; k < h_num_bond(j); k++) {
+    h_bond_type(j,k) = h_bond_type(i,k);
+    h_bond_atom(j,k) = h_bond_atom(i,k);
+  }
+
+  h_nspecial(j,0) = h_nspecial(i,0);
+  h_nspecial(j,1) = h_nspecial(i,1);
+  h_nspecial(j,2) = h_nspecial(i,2);
+  for (k = 0; k < h_nspecial(j,2); k++)
+    h_special(j,k) = h_special(i,k);
+
+  h_num_angle(j) = h_num_angle(i);
+  for (k = 0; k < h_num_angle(j); k++) {
+    h_angle_type(j,k) = h_angle_type(i,k);
+    h_angle_atom1(j,k) = h_angle_atom1(i,k);
+    h_angle_atom2(j,k) = h_angle_atom2(i,k);
+    h_angle_atom3(j,k) = h_angle_atom3(i,k);
+  }
+
+  h_num_dihedral(j) = h_num_dihedral(i);
+  for (k = 0; k < h_num_dihedral(j); k++) {
+    h_dihedral_type(j,k) = h_dihedral_type(i,k);
+    h_dihedral_atom1(j,k) = h_dihedral_atom1(i,k);
+    h_dihedral_atom2(j,k) = h_dihedral_atom2(i,k);
+    h_dihedral_atom3(j,k) = h_dihedral_atom3(i,k);
+    h_dihedral_atom4(j,k) = h_dihedral_atom4(i,k);
+  }
+
+  h_num_improper(j) = h_num_improper(i);
+  for (k = 0; k < h_num_improper(j); k++) {
+    h_improper_type(j,k) = h_improper_type(i,k);
+    h_improper_atom1(j,k) = h_improper_atom1(i,k);
+    h_improper_atom2(j,k) = h_improper_atom2(i,k);
+    h_improper_atom3(j,k) = h_improper_atom3(i,k);
+    h_improper_atom4(j,k) = h_improper_atom4(i,k);
+  }
+
+  if (atom->nextra_grow)
+    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
+      modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType,int PBC_FLAG,int TRICLINIC>
+struct AtomVecFullKokkos_PackComm {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
+  typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
+  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
+  const int _iswap;
+  X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
+  X_FLOAT _pbc[6];
+
+  AtomVecFullKokkos_PackComm(
+      const typename DAT::tdual_x_array &x,
+      const typename DAT::tdual_xfloat_2d &buf,
+      const typename DAT::tdual_int_2d &list,
+      const int & iswap,
+      const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
+      const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
+      _x(x.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap),
+      _xprd(xprd),_yprd(yprd),_zprd(zprd),
+      _xy(xy),_xz(xz),_yz(yz) {
+        const size_t maxsend = (buf.view<DeviceType>().dimension_0()
+				*buf.view<DeviceType>().dimension_1())/3;
+        const size_t elements = 3;
+        buffer_view<DeviceType>(_buf,buf,maxsend,elements);
+        _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
+        _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
+  };
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+        const int j = _list(_iswap,i);
+      if (PBC_FLAG == 0) {
+          _buf(i,0) = _x(j,0);
+          _buf(i,1) = _x(j,1);
+          _buf(i,2) = _x(j,2);
+      } else {
+        if (TRICLINIC == 0) {
+          _buf(i,0) = _x(j,0) + _pbc[0]*_xprd;
+          _buf(i,1) = _x(j,1) + _pbc[1]*_yprd;
+          _buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
+        } else {
+          _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
+          _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
+          _buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
+        }
+      }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecFullKokkos::pack_comm_kokkos(const int &n,
+                                             const DAT::tdual_int_2d &list,
+                                             const int & iswap,
+                                             const DAT::tdual_xfloat_2d &buf,
+                                             const int &pbc_flag,
+                                             const int* const pbc)
+{
+  // Check whether to always run forward communication on the host
+  // Choose correct forward PackComm kernel
+
+  if(commKK->forward_comm_on_host) {
+    sync(Host,X_MASK);
+    if(pbc_flag) {
+      if(domain->triclinic) {
+        struct AtomVecFullKokkos_PackComm<LMPHostType,1,1>
+          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
+            domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      } else {
+        struct AtomVecFullKokkos_PackComm<LMPHostType,1,0>
+          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
+            domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      }
+    } else {
+      if(domain->triclinic) {
+        struct AtomVecFullKokkos_PackComm<LMPHostType,0,1>
+          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
+            domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      } else {
+        struct AtomVecFullKokkos_PackComm<LMPHostType,0,0>
+          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
+            domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      }
+    }
+    LMPHostType::fence();
+  } else {
+    sync(Device,X_MASK);
+    if(pbc_flag) {
+      if(domain->triclinic) {
+        struct AtomVecFullKokkos_PackComm<LMPDeviceType,1,1>
+          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
+            domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      } else {
+        struct AtomVecFullKokkos_PackComm<LMPDeviceType,1,0>
+          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
+            domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      }
+    } else {
+      if(domain->triclinic) {
+        struct AtomVecFullKokkos_PackComm<LMPDeviceType,0,1>
+          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
+            domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      } else {
+        struct AtomVecFullKokkos_PackComm<LMPDeviceType,0,0>
+          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
+            domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      }
+    }
+    LMPDeviceType::fence();
+  }
+
+	return n*size_forward;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType,int PBC_FLAG,int TRICLINIC>
+struct AtomVecFullKokkos_PackCommSelf {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
+  typename ArrayTypes<DeviceType>::t_x_array _xw;
+  int _nfirst;
+  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
+  const int _iswap;
+  X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
+  X_FLOAT _pbc[6];
+
+  AtomVecFullKokkos_PackCommSelf(
+      const typename DAT::tdual_x_array &x,
+      const int &nfirst,
+      const typename DAT::tdual_int_2d &list,
+      const int & iswap,
+      const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
+      const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
+    _x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),_nfirst(nfirst),
+    _list(list.view<DeviceType>()),_iswap(iswap),
+    _xprd(xprd),_yprd(yprd),_zprd(zprd),
+    _xy(xy),_xz(xz),_yz(yz) {
+    _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
+    _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
+  };
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+        const int j = _list(_iswap,i);
+      if (PBC_FLAG == 0) {
+          _xw(i+_nfirst,0) = _x(j,0);
+          _xw(i+_nfirst,1) = _x(j,1);
+          _xw(i+_nfirst,2) = _x(j,2);
+      } else {
+        if (TRICLINIC == 0) {
+          _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd;
+          _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd;
+          _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
+        } else {
+          _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
+          _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
+          _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
+        }
+      }
+
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecFullKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
+                                           const int & iswap,
+                                           const int nfirst, const int &pbc_flag,
+                                           const int* const pbc) {
+  if(commKK->forward_comm_on_host) {
+    sync(Host,X_MASK);
+    modified(Host,X_MASK);
+    if(pbc_flag) {
+      if(domain->triclinic) {
+      struct AtomVecFullKokkos_PackCommSelf<LMPHostType,1,1>
+        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      } else {
+      struct AtomVecFullKokkos_PackCommSelf<LMPHostType,1,0>
+        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      }
+    } else {
+      if(domain->triclinic) {
+      struct AtomVecFullKokkos_PackCommSelf<LMPHostType,0,1>
+        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      } else {
+      struct AtomVecFullKokkos_PackCommSelf<LMPHostType,0,0>
+        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      }
+    }
+    LMPHostType::fence();
+  } else {
+    sync(Device,X_MASK);
+    modified(Device,X_MASK);
+    if(pbc_flag) {
+      if(domain->triclinic) {
+      struct AtomVecFullKokkos_PackCommSelf<LMPDeviceType,1,1>
+        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      } else {
+      struct AtomVecFullKokkos_PackCommSelf<LMPDeviceType,1,0>
+        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      }
+    } else {
+      if(domain->triclinic) {
+      struct AtomVecFullKokkos_PackCommSelf<LMPDeviceType,0,1>
+        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      } else {
+      struct AtomVecFullKokkos_PackCommSelf<LMPDeviceType,0,0>
+        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      }
+    }
+    LMPDeviceType::fence();
+  }
+	return n*3;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecFullKokkos_UnpackComm {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_x_array _x;
+  typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf;
+  int _first;
+
+  AtomVecFullKokkos_UnpackComm(
+      const typename DAT::tdual_x_array &x,
+      const typename DAT::tdual_xfloat_2d &buf,
+      const int& first):_x(x.view<DeviceType>()),_buf(buf.view<DeviceType>()),
+                        _first(first) {};
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+      _x(i+_first,0) = _buf(i,0);
+      _x(i+_first,1) = _buf(i,1);
+      _x(i+_first,2) = _buf(i,2);
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecFullKokkos::unpack_comm_kokkos(const int &n, const int &first,
+    const DAT::tdual_xfloat_2d &buf ) {
+  if(commKK->forward_comm_on_host) {
+    sync(Host,X_MASK);
+    modified(Host,X_MASK);
+    struct AtomVecFullKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
+    Kokkos::parallel_for(n,f);
+    LMPDeviceType::fence();
+  } else {
+    sync(Device,X_MASK);
+    modified(Device,X_MASK);
+    struct AtomVecFullKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
+    Kokkos::parallel_for(n,f);
+    LMPDeviceType::fence();
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecFullKokkos::pack_comm(int n, int *list, double *buf,
+                                      int pbc_flag, int *pbc)
+{
+  int i,j,m;
+  double dx,dy,dz;
+
+  m = 0;
+  if (pbc_flag == 0) {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0);
+      buf[m++] = h_x(j,1);
+      buf[m++] = h_x(j,2);
+    }
+  } else {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
+      dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
+      dz = pbc[2]*domain->zprd;
+    }
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0) + dx;
+      buf[m++] = h_x(j,1) + dy;
+      buf[m++] = h_x(j,2) + dz;
+    }
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecFullKokkos::pack_comm_vel(int n, int *list, double *buf,
+                                          int pbc_flag, int *pbc)
+{
+  int i,j,m;
+  double dx,dy,dz,dvx,dvy,dvz;
+
+  m = 0;
+  if (pbc_flag == 0) {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0);
+      buf[m++] = h_x(j,1);
+      buf[m++] = h_x(j,2);
+      buf[m++] = h_v(j,0);
+      buf[m++] = h_v(j,1);
+      buf[m++] = h_v(j,2);
+    }
+  } else {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
+      dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
+      dz = pbc[2]*domain->zprd;
+    }
+    if (!deform_vremap) {
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0) + dx;
+        buf[m++] = h_x(j,1) + dy;
+        buf[m++] = h_x(j,2) + dz;
+        buf[m++] = h_v(j,0);
+        buf[m++] = h_v(j,1);
+        buf[m++] = h_v(j,2);
+      }
+    } else {
+      dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
+      dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
+      dvz = pbc[2]*h_rate[2];
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0) + dx;
+        buf[m++] = h_x(j,1) + dy;
+        buf[m++] = h_x(j,2) + dz;
+        if (mask[i] & deform_groupbit) {
+          buf[m++] = h_v(j,0) + dvx;
+          buf[m++] = h_v(j,1) + dvy;
+          buf[m++] = h_v(j,2) + dvz;
+        } else {
+          buf[m++] = h_v(j,0);
+          buf[m++] = h_v(j,1);
+          buf[m++] = h_v(j,2);
+        }
+      }
+    }
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecFullKokkos::unpack_comm(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    h_x(i,0) = buf[m++];
+    h_x(i,1) = buf[m++];
+    h_x(i,2) = buf[m++];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecFullKokkos::unpack_comm_vel(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    h_x(i,0) = buf[m++];
+    h_x(i,1) = buf[m++];
+    h_x(i,2) = buf[m++];
+    h_v(i,0) = buf[m++];
+    h_v(i,1) = buf[m++];
+    h_v(i,2) = buf[m++];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecFullKokkos::pack_reverse(int n, int first, double *buf)
+{
+  if(n > 0)
+    sync(Host,F_MASK);
+
+  int m = 0;
+  const int last = first + n;
+  for (int i = first; i < last; i++) {
+    buf[m++] = h_f(i,0);
+    buf[m++] = h_f(i,1);
+    buf[m++] = h_f(i,2);
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecFullKokkos::unpack_reverse(int n, int *list, double *buf)
+{
+  if(n > 0)
+    modified(Host,F_MASK);
+
+  int m = 0;
+  for (int i = 0; i < n; i++) {
+    const int j = list[i];
+    h_f(j,0) += buf[m++];
+    h_f(j,1) += buf[m++];
+    h_f(j,2) += buf[m++];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType,int PBC_FLAG>
+struct AtomVecFullKokkos_PackBorder {
+  union ubuf {
+    double d;
+    int64_t i;
+    ubuf(double arg) : d(arg) {}
+    ubuf(int64_t arg) : i(arg) {}
+    ubuf(int arg) : i(arg) {}
+  };
+
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+
+  typename AT::t_xfloat_2d _buf;
+  const typename AT::t_int_2d_const _list;
+  const int _iswap;
+  const typename AT::t_x_array_randomread _x;
+  const typename AT::t_tagint_1d _tag;
+  const typename AT::t_int_1d _type;
+  const typename AT::t_int_1d _mask;
+  const typename AT::t_float_1d _q;
+  const typename AT::t_tagint_1d _molecule;
+  X_FLOAT _dx,_dy,_dz;
+
+  AtomVecFullKokkos_PackBorder(
+      const typename AT::t_xfloat_2d &buf,
+      const typename AT::t_int_2d_const &list,
+      const int & iswap,
+      const typename AT::t_x_array &x,
+      const typename AT::t_tagint_1d &tag,
+      const typename AT::t_int_1d &type,
+      const typename AT::t_int_1d &mask,
+      const typename AT::t_float_1d &q,
+      const typename AT::t_tagint_1d &molecule,
+      const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz):
+      _buf(buf),_list(list),_iswap(iswap),
+      _x(x),_tag(tag),_type(type),_mask(mask),_q(q),_molecule(molecule),
+      _dx(dx),_dy(dy),_dz(dz) {}
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+      const int j = _list(_iswap,i);
+      if (PBC_FLAG == 0) {
+          _buf(i,0) = _x(j,0);
+          _buf(i,1) = _x(j,1);
+          _buf(i,2) = _x(j,2);
+          _buf(i,3) = ubuf(_tag(j)).d;
+          _buf(i,4) = ubuf(_type(j)).d;
+          _buf(i,5) = ubuf(_mask(j)).d;
+          _buf(i,6) = _q(j);
+          _buf(i,7) = ubuf(_molecule(j)).d;
+      } else {
+          _buf(i,0) = _x(j,0) + _dx;
+          _buf(i,1) = _x(j,1) + _dy;
+          _buf(i,2) = _x(j,2) + _dz;
+          _buf(i,3) = ubuf(_tag(j)).d;
+          _buf(i,4) = ubuf(_type(j)).d;
+          _buf(i,5) = ubuf(_mask(j)).d;
+          _buf(i,6) = _q(j);
+          _buf(i,7) = ubuf(_molecule(j)).d;
+      }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecFullKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
+                                               DAT::tdual_xfloat_2d buf,int iswap,
+                                               int pbc_flag, int *pbc, ExecutionSpace space)
+{
+  X_FLOAT dx,dy,dz;
+
+  if (pbc_flag != 0) {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0];
+      dy = pbc[1];
+      dz = pbc[2];
+    }
+    if(space==Host) {
+      AtomVecFullKokkos_PackBorder<LMPHostType,1> f(
+        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
+        iswap,h_x,h_tag,h_type,h_mask,h_q,h_molecule,dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+      LMPHostType::fence();
+    } else {
+      AtomVecFullKokkos_PackBorder<LMPDeviceType,1> f(
+        buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
+        iswap,d_x,d_tag,d_type,d_mask,d_q,d_molecule,dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+      LMPDeviceType::fence();
+    }
+
+  } else {
+    dx = dy = dz = 0;
+    if(space==Host) {
+      AtomVecFullKokkos_PackBorder<LMPHostType,0> f(
+        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
+        iswap,h_x,h_tag,h_type,h_mask,h_q,h_molecule,dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+      LMPHostType::fence();
+    } else {
+      AtomVecFullKokkos_PackBorder<LMPDeviceType,0> f(
+        buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
+        iswap,d_x,d_tag,d_type,d_mask,d_q,d_molecule,dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+      LMPDeviceType::fence();
+    }
+  }
+  return n*size_border;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecFullKokkos::pack_border(int n, int *list, double *buf,
+                                        int pbc_flag, int *pbc)
+{
+  int i,j,m;
+  double dx,dy,dz;
+
+  m = 0;
+  if (pbc_flag == 0) {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0);
+      buf[m++] = h_x(j,1);
+      buf[m++] = h_x(j,2);
+      buf[m++] = ubuf(h_tag(j)).d;
+      buf[m++] = ubuf(h_type(j)).d;
+      buf[m++] = ubuf(h_mask(j)).d;
+      buf[m++] = h_q(j);
+      buf[m++] = ubuf(h_molecule(j)).d;
+    }
+  } else {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0];
+      dy = pbc[1];
+      dz = pbc[2];
+    }
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0) + dx;
+      buf[m++] = h_x(j,1) + dy;
+      buf[m++] = h_x(j,2) + dz;
+      buf[m++] = ubuf(h_tag(j)).d;
+      buf[m++] = ubuf(h_type(j)).d;
+      buf[m++] = ubuf(h_mask(j)).d;
+      buf[m++] = h_q(j);
+      buf[m++] = ubuf(h_molecule(j)).d;
+    }
+  }
+
+  if (atom->nextra_border)
+    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
+      m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]);
+
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecFullKokkos::pack_border_vel(int n, int *list, double *buf,
+                                            int pbc_flag, int *pbc)
+{
+  int i,j,m;
+  double dx,dy,dz,dvx,dvy,dvz;
+
+  m = 0;
+  if (pbc_flag == 0) {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0);
+      buf[m++] = h_x(j,1);
+      buf[m++] = h_x(j,2);
+      buf[m++] = ubuf(h_tag(j)).d;
+      buf[m++] = ubuf(h_type(j)).d;
+      buf[m++] = ubuf(h_mask(j)).d;
+      buf[m++] = h_q(j);
+      buf[m++] = ubuf(h_molecule(j)).d;
+      buf[m++] = h_v(j,0);
+      buf[m++] = h_v(j,1);
+      buf[m++] = h_v(j,2);
+    }
+  } else {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0];
+      dy = pbc[1];
+      dz = pbc[2];
+    }
+    if (!deform_vremap) {
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0) + dx;
+        buf[m++] = h_x(j,1) + dy;
+        buf[m++] = h_x(j,2) + dz;
+        buf[m++] = ubuf(h_tag(j)).d;
+        buf[m++] = ubuf(h_type(j)).d;
+        buf[m++] = ubuf(h_mask(j)).d;
+        buf[m++] = h_q(j);
+        buf[m++] = ubuf(h_molecule(j)).d;
+        buf[m++] = h_v(j,0);
+        buf[m++] = h_v(j,1);
+        buf[m++] = h_v(j,2);
+      }
+    } else {
+      dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
+      dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
+      dvz = pbc[2]*h_rate[2];
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0) + dx;
+        buf[m++] = h_x(j,1) + dy;
+        buf[m++] = h_x(j,2) + dz;
+        buf[m++] = ubuf(h_tag(j)).d;
+        buf[m++] = ubuf(h_type(j)).d;
+        buf[m++] = ubuf(h_mask(j)).d;
+        buf[m++] = h_q(j);
+        buf[m++] = ubuf(h_molecule(j)).d;
+        if (mask[i] & deform_groupbit) {
+          buf[m++] = h_v(j,0) + dvx;
+          buf[m++] = h_v(j,1) + dvy;
+          buf[m++] = h_v(j,2) + dvz;
+        } else {
+          buf[m++] = h_v(j,0);
+          buf[m++] = h_v(j,1);
+          buf[m++] = h_v(j,2);
+        }
+      }
+    }
+  }
+
+  if (atom->nextra_border)
+    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
+      m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]);
+
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecFullKokkos::pack_border_hybrid(int n, int *list, double *buf)
+{
+  int i,j,m;
+
+  m = 0;
+  for (i = 0; i < n; i++) {
+    j = list[i];
+    buf[m++] = h_q(j);
+    buf[m++] = ubuf(h_molecule(j)).d;
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecFullKokkos_UnpackBorder {
+  union ubuf {
+    double d;
+    int64_t i;
+    ubuf(double arg) : d(arg) {}
+    ubuf(int64_t arg) : i(arg) {}
+    ubuf(int arg) : i(arg) {}
+  };
+
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+
+  const typename AT::t_xfloat_2d_const _buf;
+  typename AT::t_x_array _x;
+  typename AT::t_tagint_1d _tag;
+  typename AT::t_int_1d _type;
+  typename AT::t_int_1d _mask;
+  typename AT::t_float_1d _q;
+  typename AT::t_tagint_1d _molecule;
+  int _first;
+
+
+  AtomVecFullKokkos_UnpackBorder(
+      const typename AT::t_xfloat_2d_const &buf,
+      typename AT::t_x_array &x,
+      typename AT::t_tagint_1d &tag,
+      typename AT::t_int_1d &type,
+      typename AT::t_int_1d &mask,
+      typename AT::t_float_1d &q,
+      typename AT::t_tagint_1d &molecule,
+      const int& first):
+    _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_q(q),_molecule(molecule),
+    _first(first){
+  };
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+      _x(i+_first,0) = _buf(i,0);
+      _x(i+_first,1) = _buf(i,1);
+      _x(i+_first,2) = _buf(i,2);
+      _tag(i+_first) = (tagint) ubuf(_buf(i,3)).i;
+      _type(i+_first) = (int) ubuf(_buf(i,4)).i;
+      _mask(i+_first) = (int) ubuf(_buf(i,5)).i;
+      _q(i+_first) = _buf(i,6);
+      _molecule(i+_first) = (tagint) ubuf(_buf(i,7)).i;
+
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecFullKokkos::unpack_border_kokkos(const int &n, const int &first,
+                                                  const DAT::tdual_xfloat_2d &buf,
+                                                  ExecutionSpace space) {
+  modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
+  while (first+n >= nmax) grow(0);
+  modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
+  if(space==Host) {
+    struct AtomVecFullKokkos_UnpackBorder<LMPHostType>
+      f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_q,h_molecule,first);
+    Kokkos::parallel_for(n,f);
+    LMPHostType::fence();
+  } else {
+    struct AtomVecFullKokkos_UnpackBorder<LMPDeviceType>
+      f(buf.view<LMPDeviceType>(),d_x,d_tag,d_type,d_mask,d_q,d_molecule,first);
+    Kokkos::parallel_for(n,f);
+    LMPDeviceType::fence();
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecFullKokkos::unpack_border(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  if(n > 0)
+    modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
+  for (i = first; i < last; i++) {
+    if (i == nmax) grow(0);
+    h_x(i,0) = buf[m++];
+    h_x(i,1) = buf[m++];
+    h_x(i,2) = buf[m++];
+    h_tag(i) =  (tagint)  ubuf(buf[m++]).i;
+    h_type(i) = (int) ubuf(buf[m++]).i;
+    h_mask(i) = (int) ubuf(buf[m++]).i;
+    h_q(i) = buf[m++];
+    h_molecule(i) = (tagint) ubuf(buf[m++]).i;
+  }
+
+  if (atom->nextra_border)
+    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
+      m += modify->fix[atom->extra_border[iextra]]->
+        unpack_border(n,first,&buf[m]);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecFullKokkos::unpack_border_vel(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    if (i == nmax) grow(0);
+    modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
+    h_x(i,0) = buf[m++];
+    h_x(i,1) = buf[m++];
+    h_x(i,2) = buf[m++];
+    h_tag(i) =  (tagint)  ubuf(buf[m++]).i;
+    h_type(i) = (int) ubuf(buf[m++]).i;
+    h_mask(i) = (int) ubuf(buf[m++]).i;
+    h_q(i) = buf[m++];
+    h_molecule(i) = (tagint) ubuf(buf[m++]).i;
+    h_v(i,0) = buf[m++];
+    h_v(i,1) = buf[m++];
+    h_v(i,2) = buf[m++];
+  }
+
+  if (atom->nextra_border)
+    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
+      m += modify->fix[atom->extra_border[iextra]]->
+        unpack_border(n,first,&buf[m]);
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecFullKokkos::unpack_border_hybrid(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    h_q(i) = buf[m++];
+    h_molecule(i) = (tagint) ubuf(buf[m++]).i;
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecFullKokkos_PackExchangeFunctor {
+
+  union ubuf {
+    double d;
+    int64_t i;
+    ubuf(double arg) : d(arg) {}
+    ubuf(int64_t arg) : i(arg) {}
+    ubuf(int arg) : i(arg) {}
+  };
+
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+  typename AT::t_x_array_randomread _x;
+  typename AT::t_v_array_randomread _v;
+  typename AT::t_tagint_1d_randomread _tag;
+  typename AT::t_int_1d_randomread _type;
+  typename AT::t_int_1d_randomread _mask;
+  typename AT::t_imageint_1d_randomread _image;
+  typename AT::t_float_1d_randomread _q;
+  typename AT::t_tagint_1d_randomread _molecule;
+  typename AT::t_int_2d_randomread _nspecial;
+  typename AT::t_tagint_2d_randomread _special;
+  typename AT::t_int_1d_randomread _num_bond;
+  typename AT::t_int_2d_randomread _bond_type;
+  typename AT::t_tagint_2d_randomread _bond_atom;
+  typename AT::t_int_1d_randomread _num_angle;
+  typename AT::t_int_2d_randomread _angle_type;
+  typename AT::t_tagint_2d_randomread _angle_atom1,_angle_atom2,_angle_atom3;
+  typename AT::t_int_1d_randomread _num_dihedral;
+  typename AT::t_int_2d_randomread _dihedral_type;
+  typename AT::t_tagint_2d_randomread _dihedral_atom1,_dihedral_atom2,
+    _dihedral_atom3,_dihedral_atom4;
+  typename AT::t_int_1d_randomread _num_improper;
+  typename AT::t_int_2d_randomread _improper_type;
+  typename AT::t_tagint_2d_randomread _improper_atom1,_improper_atom2,
+    _improper_atom3,_improper_atom4;
+  typename AT::t_x_array _xw;
+  typename AT::t_v_array _vw;
+  typename AT::t_tagint_1d _tagw;
+  typename AT::t_int_1d _typew;
+  typename AT::t_int_1d _maskw;
+  typename AT::t_imageint_1d _imagew;
+  typename AT::t_float_1d _qw;
+  typename AT::t_tagint_1d _moleculew;
+  typename AT::t_int_2d _nspecialw;
+  typename AT::t_tagint_2d _specialw;
+  typename AT::t_int_1d _num_bondw;
+  typename AT::t_int_2d _bond_typew;
+  typename AT::t_tagint_2d _bond_atomw;
+  typename AT::t_int_1d _num_anglew;
+  typename AT::t_int_2d _angle_typew;
+  typename AT::t_tagint_2d _angle_atom1w,_angle_atom2w,_angle_atom3w;
+  typename AT::t_int_1d _num_dihedralw;
+  typename AT::t_int_2d _dihedral_typew;
+  typename AT::t_tagint_2d _dihedral_atom1w,_dihedral_atom2w,
+    _dihedral_atom3w,_dihedral_atom4w;
+  typename AT::t_int_1d _num_improperw;
+  typename AT::t_int_2d _improper_typew;
+  typename AT::t_tagint_2d _improper_atom1w,_improper_atom2w,
+    _improper_atom3w,_improper_atom4w;
+  typename AT::t_xfloat_2d_um _buf;
+  typename AT::t_int_1d_const _sendlist;
+  typename AT::t_int_1d_const _copylist;
+  int _nlocal,_dim;
+  X_FLOAT _lo,_hi;
+  size_t elements;
+
+  AtomVecFullKokkos_PackExchangeFunctor(
+      const AtomKokkos* atom,
+      const typename AT::tdual_xfloat_2d buf,
+      typename AT::tdual_int_1d sendlist,
+      typename AT::tdual_int_1d copylist,int nlocal, int dim,
+                X_FLOAT lo, X_FLOAT hi):
+    _x(atom->k_x.view<DeviceType>()),
+    _v(atom->k_v.view<DeviceType>()),
+    _tag(atom->k_tag.view<DeviceType>()),
+    _type(atom->k_type.view<DeviceType>()),
+    _mask(atom->k_mask.view<DeviceType>()),
+    _image(atom->k_image.view<DeviceType>()),
+    _q(atom->k_q.view<DeviceType>()),
+    _molecule(atom->k_molecule.view<DeviceType>()),
+    _nspecial(atom->k_nspecial.view<DeviceType>()),
+    _special(atom->k_special.view<DeviceType>()),
+    _num_bond(atom->k_num_bond.view<DeviceType>()),
+    _bond_type(atom->k_bond_type.view<DeviceType>()),
+    _bond_atom(atom->k_bond_atom.view<DeviceType>()),
+    _num_angle(atom->k_num_angle.view<DeviceType>()),
+    _angle_type(atom->k_angle_type.view<DeviceType>()),
+    _angle_atom1(atom->k_angle_atom1.view<DeviceType>()),
+    _angle_atom2(atom->k_angle_atom2.view<DeviceType>()),
+    _angle_atom3(atom->k_angle_atom3.view<DeviceType>()),
+    _num_dihedral(atom->k_num_dihedral.view<DeviceType>()),
+    _dihedral_type(atom->k_dihedral_type.view<DeviceType>()),
+    _dihedral_atom1(atom->k_dihedral_atom1.view<DeviceType>()),
+    _dihedral_atom2(atom->k_dihedral_atom2.view<DeviceType>()),
+    _dihedral_atom3(atom->k_dihedral_atom3.view<DeviceType>()),
+    _dihedral_atom4(atom->k_dihedral_atom4.view<DeviceType>()),
+    _num_improper(atom->k_num_improper.view<DeviceType>()),
+    _improper_type(atom->k_improper_type.view<DeviceType>()),
+    _improper_atom1(atom->k_improper_atom1.view<DeviceType>()),
+    _improper_atom2(atom->k_improper_atom2.view<DeviceType>()),
+    _improper_atom3(atom->k_improper_atom3.view<DeviceType>()),
+    _improper_atom4(atom->k_improper_atom4.view<DeviceType>()),
+    _xw(atom->k_x.view<DeviceType>()),
+    _vw(atom->k_v.view<DeviceType>()),
+    _tagw(atom->k_tag.view<DeviceType>()),
+    _typew(atom->k_type.view<DeviceType>()),
+    _maskw(atom->k_mask.view<DeviceType>()),
+    _imagew(atom->k_image.view<DeviceType>()),
+    _qw(atom->k_q.view<DeviceType>()),
+    _moleculew(atom->k_molecule.view<DeviceType>()),
+    _nspecialw(atom->k_nspecial.view<DeviceType>()),
+    _specialw(atom->k_special.view<DeviceType>()),
+    _num_bondw(atom->k_num_bond.view<DeviceType>()),
+    _bond_typew(atom->k_bond_type.view<DeviceType>()),
+    _bond_atomw(atom->k_bond_atom.view<DeviceType>()),
+    _num_anglew(atom->k_num_angle.view<DeviceType>()),
+    _angle_typew(atom->k_angle_type.view<DeviceType>()),
+    _angle_atom1w(atom->k_angle_atom1.view<DeviceType>()),
+    _angle_atom2w(atom->k_angle_atom2.view<DeviceType>()),
+    _angle_atom3w(atom->k_angle_atom3.view<DeviceType>()),
+    _num_dihedralw(atom->k_num_dihedral.view<DeviceType>()),
+    _dihedral_typew(atom->k_dihedral_type.view<DeviceType>()),
+    _dihedral_atom1w(atom->k_dihedral_atom1.view<DeviceType>()),
+    _dihedral_atom2w(atom->k_dihedral_atom2.view<DeviceType>()),
+    _dihedral_atom3w(atom->k_dihedral_atom3.view<DeviceType>()),
+    _dihedral_atom4w(atom->k_dihedral_atom4.view<DeviceType>()),
+    _num_improperw(atom->k_num_improper.view<DeviceType>()),
+    _improper_typew(atom->k_improper_type.view<DeviceType>()),
+    _improper_atom1w(atom->k_improper_atom1.view<DeviceType>()),
+    _improper_atom2w(atom->k_improper_atom2.view<DeviceType>()),
+    _improper_atom3w(atom->k_improper_atom3.view<DeviceType>()),
+    _improper_atom4w(atom->k_improper_atom4.view<DeviceType>()),
+    _sendlist(sendlist.template view<DeviceType>()),
+    _copylist(copylist.template view<DeviceType>()),
+    _nlocal(nlocal),_dim(dim),
+    _lo(lo),_hi(hi){
+    // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial,
+    // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom,
+    // 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2,
+    // and angle_atom3
+    // 1 num_dihedral, dihedral_per_atom dihedral_type, 4*dihedral_per_atom
+    // 1 num_improper, 5*improper_per_atom
+    // 1 charge
+    // 1 to store buffer length
+    elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
+      5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
+    const int maxsendlist = (buf.template view<DeviceType>().dimension_0()*
+			     buf.template view<DeviceType>().dimension_1())/elements;
+    buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int &mysend) const {
+    int k;
+    const int i = _sendlist(mysend);
+    _buf(mysend,0) = elements;
+    int m = 1;
+    _buf(mysend,m++) = _x(i,0);
+    _buf(mysend,m++) = _x(i,1);
+    _buf(mysend,m++) = _x(i,2);
+    _buf(mysend,m++) = _v(i,0);
+    _buf(mysend,m++) = _v(i,1);
+    _buf(mysend,m++) = _v(i,2);
+    _buf(mysend,m++) = ubuf(_tag(i)).d;
+    _buf(mysend,m++) = ubuf(_type(i)).d;
+    _buf(mysend,m++) = ubuf(_mask(i)).d;
+    _buf(mysend,m++) = ubuf(_image(i)).d;
+    _buf(mysend,m++) = _q(i);
+    _buf(mysend,m++) = ubuf(_molecule(i)).d;
+    _buf(mysend,m++) = ubuf(_num_bond(i)).d;
+    for (k = 0; k < _num_bond(i); k++) {
+      _buf(mysend,m++) = ubuf(_bond_type(i,k)).d;
+      _buf(mysend,m++) = ubuf(_bond_atom(i,k)).d;
+    }
+    _buf(mysend,m++) = ubuf(_num_angle(i)).d;
+    for (k = 0; k < _num_angle(i); k++) {
+      _buf(mysend,m++) = ubuf(_angle_type(i,k)).d;
+      _buf(mysend,m++) = ubuf(_angle_atom1(i,k)).d;
+      _buf(mysend,m++) = ubuf(_angle_atom2(i,k)).d;
+      _buf(mysend,m++) = ubuf(_angle_atom3(i,k)).d;
+    }
+    _buf(mysend,m++) = ubuf(_num_dihedral(i)).d;
+    for (k = 0; k < _num_dihedral(i); k++) {
+      _buf(mysend,m++) = ubuf(_dihedral_type(i,k)).d;
+      _buf(mysend,m++) = ubuf(_dihedral_atom1(i,k)).d;
+      _buf(mysend,m++) = ubuf(_dihedral_atom2(i,k)).d;
+      _buf(mysend,m++) = ubuf(_dihedral_atom3(i,k)).d;
+      _buf(mysend,m++) = ubuf(_dihedral_atom4(i,k)).d;
+    }
+    _buf(mysend,m++) = ubuf(_num_improper(i)).d;
+    for (k = 0; k < _num_improper(i); k++) {
+      _buf(mysend,m++) = ubuf(_improper_type(i,k)).d;
+      _buf(mysend,m++) = ubuf(_improper_atom1(i,k)).d;
+      _buf(mysend,m++) = ubuf(_improper_atom2(i,k)).d;
+      _buf(mysend,m++) = ubuf(_improper_atom3(i,k)).d;
+      _buf(mysend,m++) = ubuf(_improper_atom4(i,k)).d;
+    }
+
+    _buf(mysend,m++) = ubuf(_nspecial(i,0)).d;
+    _buf(mysend,m++) = ubuf(_nspecial(i,1)).d;
+    _buf(mysend,m++) = ubuf(_nspecial(i,2)).d;
+    for (k = 0; k < _nspecial(i,2); k++)
+      _buf(mysend,m++) = ubuf(_special(i,k)).d;
+
+    const int j = _copylist(mysend);
+
+    if(j>-1) {
+    _xw(i,0) = _x(j,0);
+    _xw(i,1) = _x(j,1);
+    _xw(i,2) = _x(j,2);
+    _vw(i,0) = _v(j,0);
+    _vw(i,1) = _v(j,1);
+    _vw(i,2) = _v(j,2);
+    _tagw(i) = _tag(j);
+    _typew(i) = _type(j);
+    _maskw(i) = _mask(j);
+    _imagew(i) = _image(j);
+    _qw(i) = _q(j);
+    _moleculew(i) = _molecule(j);
+    _num_bondw(i) = _num_bond(j);
+    for (k = 0; k < _num_bond(j); k++) {
+      _bond_typew(i,k) = _bond_type(j,k);
+      _bond_atomw(i,k) = _bond_atom(j,k);
+    }
+    _num_anglew(i) = _num_angle(j);
+    for (k = 0; k < _num_angle(j); k++) {
+      _angle_typew(i,k) = _angle_type(j,k);
+      _angle_atom1w(i,k) = _angle_atom1(j,k);
+      _angle_atom2w(i,k) = _angle_atom2(j,k);
+      _angle_atom3w(i,k) = _angle_atom3(j,k);
+    }
+    _num_dihedralw(i) = _num_dihedral(j);
+    for (k = 0; k < _num_dihedral(j); k++) {
+      _dihedral_typew(i,k) = _dihedral_type(j,k);
+      _dihedral_atom1w(i,k) = _dihedral_atom1(j,k);
+      _dihedral_atom2w(i,k) = _dihedral_atom2(j,k);
+      _dihedral_atom3w(i,k) = _dihedral_atom3(j,k);
+      _dihedral_atom4w(i,k) = _dihedral_atom4(j,k);
+    }
+    _num_improperw(i) = _num_improper(j);
+    for (k = 0; k < _num_improper(j); k++) {
+      _improper_typew(i,k) = _improper_type(j,k);
+      _improper_atom1w(i,k) = _improper_atom1(j,k);
+      _improper_atom2w(i,k) = _improper_atom2(j,k);
+      _improper_atom3w(i,k) = _improper_atom3(j,k);
+      _improper_atom4w(i,k) = _improper_atom4(j,k);
+    }
+    _nspecialw(i,0) = _nspecial(j,0);
+    _nspecialw(i,1) = _nspecial(j,1);
+    _nspecialw(i,2) = _nspecial(j,2);
+    for (k = 0; k < _nspecial(j,2); k++)
+      _specialw(i,k) = _special(j,k);
+    }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecFullKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf,
+                                                 DAT::tdual_int_1d k_sendlist,
+                                                 DAT::tdual_int_1d k_copylist,
+                                                 ExecutionSpace space,int dim,X_FLOAT lo,
+                                                 X_FLOAT hi )
+{
+  const int elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
+      5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
+  if(nsend > (int) (k_buf.view<LMPHostType>().dimension_0()*
+	      k_buf.view<LMPHostType>().dimension_1())/elements) {
+    int newsize = nsend*elements/k_buf.view<LMPHostType>().dimension_1()+1;
+    k_buf.resize(newsize,k_buf.view<LMPHostType>().dimension_1());
+  }
+  if(space == Host) {
+    AtomVecFullKokkos_PackExchangeFunctor<LMPHostType>
+      f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
+    Kokkos::parallel_for(nsend,f);
+    LMPHostType::fence();
+    return nsend*elements;
+  } else {
+    AtomVecFullKokkos_PackExchangeFunctor<LMPDeviceType>
+      f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
+    Kokkos::parallel_for(nsend,f);
+    LMPDeviceType::fence();
+    return nsend*elements;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecFullKokkos::pack_exchange(int i, double *buf)
+{
+  int k;
+  int m = 1;
+  buf[m++] = h_x(i,0);
+  buf[m++] = h_x(i,1);
+  buf[m++] = h_x(i,2);
+  buf[m++] = h_v(i,0);
+  buf[m++] = h_v(i,1);
+  buf[m++] = h_v(i,2);
+  buf[m++] = ubuf(h_tag(i)).d;
+  buf[m++] = ubuf(h_type(i)).d;
+  buf[m++] = ubuf(h_mask(i)).d;
+  buf[m++] = ubuf(h_image(i)).d;
+  buf[m++] = h_q(i);
+  buf[m++] = ubuf(h_molecule(i)).d;
+  buf[m++] = ubuf(h_num_bond(i)).d;
+  for (k = 0; k < h_num_bond(i); k++) {
+    buf[m++] = ubuf(h_bond_type(i,k)).d;
+    buf[m++] = ubuf(h_bond_atom(i,k)).d;
+  }
+  buf[m++] = ubuf(h_num_angle(i)).d;
+  for (k = 0; k < h_num_angle(i); k++) {
+    buf[m++] = ubuf(h_angle_type(i,k)).d;
+    buf[m++] = ubuf(h_angle_atom1(i,k)).d;
+    buf[m++] = ubuf(h_angle_atom2(i,k)).d;
+    buf[m++] = ubuf(h_angle_atom3(i,k)).d;
+  }
+  buf[m++] = ubuf(h_num_dihedral(i)).d;
+  for (k = 0; k < h_num_dihedral(i); k++) {
+    buf[m++] = ubuf(h_dihedral_type(i,k)).d;
+    buf[m++] = ubuf(h_dihedral_atom1(i,k)).d;
+    buf[m++] = ubuf(h_dihedral_atom2(i,k)).d;
+    buf[m++] = ubuf(h_dihedral_atom3(i,k)).d;
+    buf[m++] = ubuf(h_dihedral_atom4(i,k)).d;
+  }
+  buf[m++] = ubuf(h_num_improper(i)).d;
+  for (k = 0; k < h_num_improper(i); k++) {
+    buf[m++] = ubuf(h_improper_type(i,k)).d;
+    buf[m++] = ubuf(h_improper_atom1(i,k)).d;
+    buf[m++] = ubuf(h_improper_atom2(i,k)).d;
+    buf[m++] = ubuf(h_improper_atom3(i,k)).d;
+    buf[m++] = ubuf(h_improper_atom4(i,k)).d;
+  }
+  buf[m++] = ubuf(h_nspecial(i,0)).d;
+  buf[m++] = ubuf(h_nspecial(i,1)).d;
+  buf[m++] = ubuf(h_nspecial(i,2)).d;
+  for (k = 0; k < h_nspecial(i,2); k++)
+    buf[m++] = ubuf(h_special(i,k)).d;
+
+  if (atom->nextra_grow)
+    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
+      m += modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&buf[m]);
+
+  buf[0] = m;
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecFullKokkos_UnpackExchangeFunctor {
+
+  union ubuf {
+    double d;
+    int64_t i;
+    ubuf(double arg) : d(arg) {}
+    ubuf(int64_t arg) : i(arg) {}
+    ubuf(int arg) : i(arg) {}
+  };
+
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+  typename AT::t_x_array _x;
+  typename AT::t_v_array _v;
+  typename AT::t_tagint_1d _tag;
+  typename AT::t_int_1d _type;
+  typename AT::t_int_1d _mask;
+  typename AT::t_imageint_1d _image;
+  typename AT::t_float_1d _q;
+  typename AT::t_tagint_1d _molecule;
+  typename AT::t_int_2d _nspecial;
+  typename AT::t_tagint_2d _special;
+  typename AT::t_int_1d _num_bond;
+  typename AT::t_int_2d _bond_type;
+  typename AT::t_tagint_2d _bond_atom;
+  typename AT::t_int_1d _num_angle;
+  typename AT::t_int_2d _angle_type;
+  typename AT::t_tagint_2d _angle_atom1,_angle_atom2,_angle_atom3;
+  typename AT::t_int_1d _num_dihedral;
+  typename AT::t_int_2d _dihedral_type;
+  typename AT::t_tagint_2d _dihedral_atom1,_dihedral_atom2,
+    _dihedral_atom3,_dihedral_atom4;
+  typename AT::t_int_1d _num_improper;
+  typename AT::t_int_2d _improper_type;
+  typename AT::t_tagint_2d _improper_atom1,_improper_atom2,
+    _improper_atom3,_improper_atom4;
+
+  typename AT::t_xfloat_2d_um _buf;
+  typename AT::t_int_1d _nlocal;
+  int _dim;
+  X_FLOAT _lo,_hi;
+  size_t elements;
+
+  AtomVecFullKokkos_UnpackExchangeFunctor(
+      const AtomKokkos* atom,
+      const typename AT::tdual_xfloat_2d buf,
+      typename AT::tdual_int_1d nlocal,
+      int dim, X_FLOAT lo, X_FLOAT hi):
+    _x(atom->k_x.view<DeviceType>()),
+    _v(atom->k_v.view<DeviceType>()),
+    _tag(atom->k_tag.view<DeviceType>()),
+    _type(atom->k_type.view<DeviceType>()),
+    _mask(atom->k_mask.view<DeviceType>()),
+    _image(atom->k_image.view<DeviceType>()),
+    _q(atom->k_q.view<DeviceType>()),
+    _molecule(atom->k_molecule.view<DeviceType>()),
+    _nspecial(atom->k_nspecial.view<DeviceType>()),
+    _special(atom->k_special.view<DeviceType>()),
+    _num_bond(atom->k_num_bond.view<DeviceType>()),
+    _bond_type(atom->k_bond_type.view<DeviceType>()),
+    _bond_atom(atom->k_bond_atom.view<DeviceType>()),
+    _num_angle(atom->k_num_angle.view<DeviceType>()),
+    _angle_type(atom->k_angle_type.view<DeviceType>()),
+    _angle_atom1(atom->k_angle_atom1.view<DeviceType>()),
+    _angle_atom2(atom->k_angle_atom2.view<DeviceType>()),
+    _angle_atom3(atom->k_angle_atom3.view<DeviceType>()),
+    _num_dihedral(atom->k_num_dihedral.view<DeviceType>()),
+    _dihedral_type(atom->k_dihedral_type.view<DeviceType>()),
+    _dihedral_atom1(atom->k_dihedral_atom1.view<DeviceType>()),
+    _dihedral_atom2(atom->k_dihedral_atom2.view<DeviceType>()),
+    _dihedral_atom3(atom->k_dihedral_atom3.view<DeviceType>()),
+    _dihedral_atom4(atom->k_dihedral_atom4.view<DeviceType>()),
+    _num_improper(atom->k_num_improper.view<DeviceType>()),
+    _improper_type(atom->k_improper_type.view<DeviceType>()),
+    _improper_atom1(atom->k_improper_atom1.view<DeviceType>()),
+    _improper_atom2(atom->k_improper_atom2.view<DeviceType>()),
+    _improper_atom3(atom->k_improper_atom3.view<DeviceType>()),
+    _improper_atom4(atom->k_improper_atom4.view<DeviceType>()),
+    _nlocal(nlocal.template view<DeviceType>()),_dim(dim),
+    _lo(lo),_hi(hi){
+
+    elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
+      5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
+    const int maxsendlist = (buf.template view<DeviceType>().dimension_0()*
+			     buf.template view<DeviceType>().dimension_1())/elements;
+    buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int &myrecv) const {
+    X_FLOAT x = _buf(myrecv,_dim+1);
+    if (x >= _lo && x < _hi) {
+      int i = Kokkos::atomic_fetch_add(&_nlocal(0),1);
+      int m = 1;
+      _x(i,0) = _buf(myrecv,m++);
+      _x(i,1) = _buf(myrecv,m++);
+      _x(i,2) = _buf(myrecv,m++);
+      _v(i,0) = _buf(myrecv,m++);
+      _v(i,1) = _buf(myrecv,m++);
+      _v(i,2) = _buf(myrecv,m++);
+      _tag(i) = (tagint) ubuf(_buf(myrecv,m++)).i;
+      _type(i) = (int) ubuf(_buf(myrecv,m++)).i;
+      _mask(i) = (int) ubuf(_buf(myrecv,m++)).i;
+      _image(i) = (imageint) ubuf(_buf(myrecv,m++)).i;
+      _q(i) = _buf(myrecv,m++);
+      _molecule(i) = (tagint) ubuf(_buf(myrecv,m++)).i;
+      _num_bond(i) = (int) ubuf(_buf(myrecv,m++)).i;
+      int k;
+      for (k = 0; k < _num_bond(i); k++) {
+        _bond_type(i,k) = (int) ubuf(_buf(myrecv,m++)).i;
+        _bond_atom(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i;
+      }
+      _num_angle(i) = (int) ubuf(_buf(myrecv,m++)).i;
+      for (k = 0; k < _num_angle(i); k++) {
+        _angle_type(i,k) = (int) ubuf(_buf(myrecv,m++)).i;
+        _angle_atom1(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i;
+        _angle_atom2(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i;
+        _angle_atom3(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i;
+      }
+      _num_dihedral(i) = (int) ubuf(_buf(myrecv,m++)).i;
+      for (k = 0; k < _num_dihedral(i); k++) {
+        _dihedral_type(i,k) = (int) ubuf(_buf(myrecv,m++)).i;
+        _dihedral_atom1(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i;
+        _dihedral_atom2(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i;
+        _dihedral_atom3(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i;
+        _dihedral_atom4(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i;
+      }
+      _num_improper(i) = (int) ubuf(_buf(myrecv,m++)).i;
+      for (k = 0; k < _num_improper(i); k++) {
+        _improper_type(i,k) = (int) ubuf(_buf(myrecv,m++)).i;
+        _improper_atom1(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i;
+        _improper_atom2(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i;
+        _improper_atom3(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i;
+        _improper_atom4(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i;
+      }
+      _nspecial(i,0) = (int) ubuf(_buf(myrecv,m++)).i;
+      _nspecial(i,1) = (int) ubuf(_buf(myrecv,m++)).i;
+      _nspecial(i,2) = (int) ubuf(_buf(myrecv,m++)).i;
+      for (k = 0; k < _nspecial(i,2); k++)
+        _special(i,k) = (tagint) ubuf(_buf(myrecv,m++)).i;
+    }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecFullKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,
+                                              int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,
+                                              ExecutionSpace space) {
+  const size_t elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
+    5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
+  if(space == Host) {
+    k_count.h_view(0) = nlocal;
+    AtomVecFullKokkos_UnpackExchangeFunctor<LMPHostType>
+      f(atomKK,k_buf,k_count,dim,lo,hi);
+    Kokkos::parallel_for(nrecv/elements,f);
+    LMPHostType::fence();
+    return k_count.h_view(0);
+  } else {
+    k_count.h_view(0) = nlocal;
+    k_count.modify<LMPHostType>();
+    k_count.sync<LMPDeviceType>();
+    AtomVecFullKokkos_UnpackExchangeFunctor<LMPDeviceType>
+      f(atomKK,k_buf,k_count,dim,lo,hi);
+    Kokkos::parallel_for(nrecv/elements,f);
+    LMPDeviceType::fence();
+    k_count.modify<LMPDeviceType>();
+    k_count.sync<LMPHostType>();
+
+    return k_count.h_view(0);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecFullKokkos::unpack_exchange(double *buf)
+{
+  int nlocal = atom->nlocal;
+  if (nlocal == nmax) grow(0);
+  modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
+           MASK_MASK | IMAGE_MASK | Q_MASK | MOLECULE_MASK | BOND_MASK |
+           ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK);
+
+  int k;
+  int m = 1;
+  h_x(nlocal,0) = buf[m++];
+  h_x(nlocal,1) = buf[m++];
+  h_x(nlocal,2) = buf[m++];
+  h_v(nlocal,0) = buf[m++];
+  h_v(nlocal,1) = buf[m++];
+  h_v(nlocal,2) = buf[m++];
+  h_tag(nlocal) = (tagint) ubuf(buf[m++]).i;
+  h_type(nlocal) = (int) ubuf(buf[m++]).i;
+  h_mask(nlocal) = (int) ubuf(buf[m++]).i;
+  h_image(nlocal) = (imageint) ubuf(buf[m++]).i;
+  h_q(nlocal) = buf[m++];
+  h_molecule(nlocal) = (tagint) ubuf(buf[m++]).i;
+
+  h_num_bond(nlocal) = (int) ubuf(buf[m++]).i;
+  for (k = 0; k < h_num_bond(nlocal); k++) {
+    h_bond_type(nlocal,k) = (int) ubuf(buf[m++]).i;
+    h_bond_atom(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+  }
+  h_num_angle(nlocal) = (int) ubuf(buf[m++]).i;
+  for (k = 0; k < h_num_angle(nlocal); k++) {
+    h_angle_type(nlocal,k) = (int) ubuf(buf[m++]).i;
+    h_angle_atom1(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_angle_atom2(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_angle_atom3(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+  }
+  h_num_dihedral(nlocal) = (int) ubuf(buf[m++]).i;
+  for (k = 0; k < h_num_dihedral(nlocal); k++) {
+    h_dihedral_type(nlocal,k) = (int) ubuf(buf[m++]).i;
+    h_dihedral_atom1(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_dihedral_atom2(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_dihedral_atom3(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_dihedral_atom4(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+  }
+  h_num_improper(nlocal) = (int) ubuf(buf[m++]).i;
+  for (k = 0; k < h_num_improper(nlocal); k++) {
+    h_improper_type(nlocal,k) = (int) ubuf(buf[m++]).i;
+    h_improper_atom1(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_improper_atom2(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_improper_atom3(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_improper_atom4(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+  }
+  h_nspecial(nlocal,0) = (int) ubuf(buf[m++]).i;
+  h_nspecial(nlocal,1) = (int) ubuf(buf[m++]).i;
+  h_nspecial(nlocal,2) = (int) ubuf(buf[m++]).i;
+  for (k = 0; k < h_nspecial(nlocal,2); k++)
+	 h_special(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+
+  if (atom->nextra_grow)
+    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
+      m += modify->fix[atom->extra_grow[iextra]]->
+        unpack_exchange(nlocal,&buf[m]);
+
+  atom->nlocal++;
+  return m;
+}
+
+/* ----------------------------------------------------------------------
+   size of restart data for all atoms owned by this proc
+   include extra data stored by fixes
+------------------------------------------------------------------------- */
+
+int AtomVecFullKokkos::size_restart()
+{
+  int i;
+
+  int nlocal = atom->nlocal;
+  int n = 0;
+  for (i = 0; i < nlocal; i++)
+    n += 17 + 2*num_bond[i] + 4*num_angle[i] +
+      5*num_dihedral[i] + 5*num_improper[i];
+
+  if (atom->nextra_restart)
+    for (int iextra = 0; iextra < atom->nextra_restart; iextra++)
+      for (i = 0; i < nlocal; i++)
+        n += modify->fix[atom->extra_restart[iextra]]->size_restart(i);
+
+  return n;
+}
+
+/* ----------------------------------------------------------------------
+   pack atom I's data for restart file including extra quantities
+   xyz must be 1st 3 values, so that read_restart can test on them
+   molecular types may be negative, but write as positive
+------------------------------------------------------------------------- */
+
+int AtomVecFullKokkos::pack_restart(int i, double *buf)
+{
+  int m = 1;
+  buf[m++] = h_x(i,0);
+  buf[m++] = h_x(i,1);
+  buf[m++] = h_x(i,2);
+  buf[m++] = ubuf(h_tag(i)).d;
+  buf[m++] = ubuf(h_type(i)).d;
+  buf[m++] = ubuf(h_mask(i)).d;
+  buf[m++] = ubuf(h_image(i)).d;
+  buf[m++] = h_v(i,0);
+  buf[m++] = h_v(i,1);
+  buf[m++] = h_v(i,2);
+
+  buf[m++] = h_q(i);
+  buf[m++] = ubuf(h_molecule(i)).d;
+
+  buf[m++] = ubuf(h_num_bond(i)).d;
+  for (int k = 0; k < h_num_bond(i); k++) {
+    buf[m++] = ubuf(MAX(h_bond_type(i,k),-h_bond_type(i,k))).d;
+    buf[m++] = ubuf(h_bond_atom(i,k)).d;
+  }
+
+  buf[m++] = ubuf(h_num_angle(i)).d;
+  for (int k = 0; k < h_num_angle(i); k++) {
+    buf[m++] = ubuf(MAX(h_angle_type(i,k),-h_angle_type(i,k))).d;
+    buf[m++] = ubuf(h_angle_atom1(i,k)).d;
+    buf[m++] = ubuf(h_angle_atom2(i,k)).d;
+    buf[m++] = ubuf(h_angle_atom3(i,k)).d;
+  }
+
+  buf[m++] = ubuf(h_num_dihedral(i)).d;
+  for (int k = 0; k < h_num_dihedral(i); k++) {
+    buf[m++] = ubuf(MAX(h_dihedral_type(i,k),-h_dihedral_type(i,k))).d;
+    buf[m++] = ubuf(h_dihedral_atom1(i,k)).d;
+    buf[m++] = ubuf(h_dihedral_atom2(i,k)).d;
+    buf[m++] = ubuf(h_dihedral_atom3(i,k)).d;
+    buf[m++] = ubuf(h_dihedral_atom4(i,k)).d;
+  }
+
+  buf[m++] = ubuf(h_num_improper(i)).d;
+  for (int k = 0; k < h_num_improper(i); k++) {
+    buf[m++] = ubuf(MAX(h_improper_type(i,k),-h_improper_type(i,k))).d;
+    buf[m++] = ubuf(h_improper_atom1(i,k)).d;
+    buf[m++] = ubuf(h_improper_atom2(i,k)).d;
+    buf[m++] = ubuf(h_improper_atom3(i,k)).d;
+    buf[m++] = ubuf(h_improper_atom4(i,k)).d;
+  }
+
+  if (atom->nextra_restart)
+    for (int iextra = 0; iextra < atom->nextra_restart; iextra++)
+      m += modify->fix[atom->extra_restart[iextra]]->pack_restart(i,&buf[m]);
+
+  buf[0] = m;
+  return m;
+}
+
+/* ----------------------------------------------------------------------
+   unpack data for one atom from restart file including extra quantities
+------------------------------------------------------------------------- */
+
+int AtomVecFullKokkos::unpack_restart(double *buf)
+{
+  int k;
+
+  int nlocal = atom->nlocal;
+  if (nlocal == nmax) {
+    grow(0);
+    if (atom->nextra_store)
+      memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra");
+  }
+
+  int m = 1;
+  h_x(nlocal,0) = buf[m++];
+  h_x(nlocal,1) = buf[m++];
+  h_x(nlocal,2) = buf[m++];
+  h_tag(nlocal) = (tagint) ubuf(buf[m++]).i;
+  h_type(nlocal) = (int) ubuf(buf[m++]).i;
+  h_mask(nlocal) = (int) ubuf(buf[m++]).i;
+  h_image(nlocal) = (imageint) ubuf(buf[m++]).i;
+  h_v(nlocal,0) = buf[m++];
+  h_v(nlocal,1) = buf[m++];
+  h_v(nlocal,2) = buf[m++];
+
+  h_q(nlocal) = buf[m++];
+  h_molecule(nlocal) = (tagint) ubuf(buf[m++]).i;
+
+  h_num_bond(nlocal) = (int) ubuf(buf[m++]).i;
+  for (k = 0; k < h_num_bond(nlocal); k++) {
+    h_bond_type(nlocal,k) = (int) ubuf(buf[m++]).i;
+    h_bond_atom(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+  }
+
+  h_num_angle(nlocal) = (int) ubuf(buf[m++]).i;
+  for (k = 0; k < h_num_angle(nlocal); k++) {
+    h_angle_type(nlocal,k) = (int) ubuf(buf[m++]).i;
+    h_angle_atom1(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_angle_atom2(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_angle_atom3(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+  }
+
+  h_num_dihedral(nlocal) = (int) ubuf(buf[m++]).i;
+  for (k = 0; k < h_num_dihedral(nlocal); k++) {
+    h_dihedral_type(nlocal,k) = (int) ubuf(buf[m++]).i;
+    h_dihedral_atom1(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_dihedral_atom2(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_dihedral_atom3(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_dihedral_atom4(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+  }
+
+  h_num_improper(nlocal) = (int) ubuf(buf[m++]).i;
+  for (k = 0; k < h_num_improper(nlocal); k++) {
+    h_improper_type(nlocal,k) = (int) ubuf(buf[m++]).i;
+    h_improper_atom1(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_improper_atom2(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_improper_atom3(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_improper_atom4(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+  }
+
+  h_nspecial(nlocal,0) = h_nspecial(nlocal,1) = h_nspecial(nlocal,2) = 0;
+
+  double **extra = atom->extra;
+  if (atom->nextra_store) {
+    int size = static_cast<int> (ubuf(buf[m++]).i) - m;
+    for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++];
+  }
+
+  atom->nlocal++;
+  return m;
+}
+
+/* ----------------------------------------------------------------------
+   create one atom of itype at coord
+   set other values to defaults
+------------------------------------------------------------------------- */
+
+void AtomVecFullKokkos::create_atom(int itype, double *coord)
+{
+  int nlocal = atom->nlocal;
+  if (nlocal == nmax) {
+    atomKK->modified(Host,ALL_MASK);
+    grow(0);
+  }
+  atomKK->modified(Host,ALL_MASK);
+
+  tag[nlocal] = 0;
+  type[nlocal] = itype;
+  h_x(nlocal,0) = coord[0];
+  h_x(nlocal,1) = coord[1];
+  h_x(nlocal,2) = coord[2];
+  h_mask(nlocal) = 1;
+  h_image(nlocal) = ((imageint) IMGMAX << IMG2BITS) |
+    ((imageint) IMGMAX << IMGBITS) | IMGMAX;
+  h_v(nlocal,0) = 0.0;
+  h_v(nlocal,1) = 0.0;
+  h_v(nlocal,2) = 0.0;
+
+  h_q(nlocal) = 0.0;
+  h_molecule(nlocal) = 0;
+  h_num_bond(nlocal) = 0;
+  h_num_angle(nlocal) = 0;
+  h_num_dihedral(nlocal) = 0;
+  h_num_improper(nlocal) = 0;
+  h_nspecial(nlocal,0) = h_nspecial(nlocal,1) = h_nspecial(nlocal,2) = 0;
+
+  atom->nlocal++;
+}
+
+/* ----------------------------------------------------------------------
+   unpack one line from Atoms section of data file
+   initialize other atom quantities
+------------------------------------------------------------------------- */
+
+void AtomVecFullKokkos::data_atom(double *coord, imageint imagetmp,
+                                       char **values)
+{
+  int nlocal = atom->nlocal;
+  if (nlocal == nmax) grow(0);
+
+  h_tag(nlocal) = atoi(values[0]);
+  if (h_tag(nlocal) <= 0)
+    error->one(FLERR,"Invalid atom ID in Atoms section of data file");
+
+  h_molecule(nlocal) = atoi(values[1]);
+  if (h_molecule(nlocal) <= 0)
+    error->one(FLERR,"Invalid molecule ID in Atoms section of data file");
+
+  h_type(nlocal) = atoi(values[2]);
+  if (h_type(nlocal) <= 0 || h_type(nlocal) > atom->ntypes)
+    error->one(FLERR,"Invalid atom type in Atoms section of data file");
+
+  h_q(nlocal) = atof(values[3]);
+
+  h_x(nlocal,0) = coord[0];
+  h_x(nlocal,1) = coord[1];
+  h_x(nlocal,2) = coord[2];
+
+  h_image(nlocal) = imagetmp;
+
+  h_mask(nlocal) = 1;
+  h_v(nlocal,0) = 0.0;
+  h_v(nlocal,1) = 0.0;
+  h_v(nlocal,2) = 0.0;
+  h_num_bond(nlocal) = 0;
+  h_num_angle(nlocal) = 0;
+  h_num_dihedral(nlocal) = 0;
+  h_num_improper(nlocal) = 0;
+
+  atom->nlocal++;
+}
+
+/* ----------------------------------------------------------------------
+   unpack hybrid quantities from one line in Atoms section of data file
+   initialize other atom quantities for this sub-style
+------------------------------------------------------------------------- */
+
+int AtomVecFullKokkos::data_atom_hybrid(int nlocal, char **values)
+{
+  h_molecule(nlocal) = atoi(values[0]);
+  h_q(nlocal) = atof(values[1]);
+  h_num_bond(nlocal) = 0;
+  h_num_angle(nlocal) = 0;
+  h_num_dihedral(nlocal) = 0;
+  h_num_improper(nlocal) = 0;
+  return 2;
+}
+
+/* ----------------------------------------------------------------------
+   pack atom info for data file including 3 image flags
+------------------------------------------------------------------------- */
+
+void AtomVecFullKokkos::pack_data(double **buf)
+{
+  int nlocal = atom->nlocal;
+  for (int i = 0; i < nlocal; i++) {
+    buf[i][0] = h_tag(i);
+    buf[i][1] = h_molecule(i);
+    buf[i][2] = h_type(i);
+    buf[i][3] = h_q(i);
+    buf[i][4] = h_x(i,0);
+    buf[i][5] = h_x(i,1);
+    buf[i][6] = h_x(i,2);
+    buf[i][7] = (h_image[i] & IMGMASK) - IMGMAX;
+    buf[i][8] = (h_image[i] >> IMGBITS & IMGMASK) - IMGMAX;
+    buf[i][9] = (h_image[i] >> IMG2BITS) - IMGMAX;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   pack hybrid atom info for data file
+------------------------------------------------------------------------- */
+
+int AtomVecFullKokkos::pack_data_hybrid(int i, double *buf)
+{
+  buf[0] = h_molecule(i);
+  buf[1] = h_q(i);
+  return 2;
+}
+
+/* ----------------------------------------------------------------------
+   write atom info to data file including 3 image flags
+------------------------------------------------------------------------- */
+
+void AtomVecFullKokkos::write_data(FILE *fp, int n, double **buf)
+{
+  for (int i = 0; i < n; i++)
+    fprintf(fp,"%d %d %d %-1.16e %-1.16e %-1.16e %-1.16e %d %d %d\n",
+            (int) buf[i][0],(int) buf[i][1], (int) buf[i][2], buf[i][3],
+            buf[i][4],buf[i][5],buf[i][6],
+            (int) buf[i][7],(int) buf[i][8],(int) buf[i][9]);
+}
+
+/* ----------------------------------------------------------------------
+   write hybrid atom info to data file
+------------------------------------------------------------------------- */
+
+int AtomVecFullKokkos::write_data_hybrid(FILE *fp, double *buf)
+{
+  fprintf(fp," " TAGINT_FORMAT " %-1.16e",(tagint) ubuf(buf[0]).i,buf[1]);
+  return 2;
+}
+
+/* ----------------------------------------------------------------------
+   return # of bytes of allocated memory
+------------------------------------------------------------------------- */
+
+bigint AtomVecFullKokkos::memory_usage()
+{
+  bigint bytes = 0;
+
+  if (atom->memcheck("tag")) bytes += memory->usage(tag,nmax);
+  if (atom->memcheck("type")) bytes += memory->usage(type,nmax);
+  if (atom->memcheck("mask")) bytes += memory->usage(mask,nmax);
+  if (atom->memcheck("image")) bytes += memory->usage(image,nmax);
+  if (atom->memcheck("x")) bytes += memory->usage(x,nmax,3);
+  if (atom->memcheck("v")) bytes += memory->usage(v,nmax,3);
+  if (atom->memcheck("f")) bytes += memory->usage(f,nmax*commKK->nthreads,3);
+
+  if (atom->memcheck("q")) bytes += memory->usage(q,nmax);
+  if (atom->memcheck("molecule")) bytes += memory->usage(molecule,nmax);
+  if (atom->memcheck("nspecial")) bytes += memory->usage(nspecial,nmax,3);
+  if (atom->memcheck("special"))
+    bytes += memory->usage(special,nmax,atom->maxspecial);
+
+  if (atom->memcheck("num_bond")) bytes += memory->usage(num_bond,nmax);
+  if (atom->memcheck("bond_type"))
+    bytes += memory->usage(bond_type,nmax,atom->bond_per_atom);
+  if (atom->memcheck("bond_atom"))
+    bytes += memory->usage(bond_atom,nmax,atom->bond_per_atom);
+
+  if (atom->memcheck("num_angle")) bytes += memory->usage(num_angle,nmax);
+  if (atom->memcheck("angle_type"))
+    bytes += memory->usage(angle_type,nmax,atom->angle_per_atom);
+  if (atom->memcheck("angle_atom1"))
+    bytes += memory->usage(angle_atom1,nmax,atom->angle_per_atom);
+  if (atom->memcheck("angle_atom2"))
+    bytes += memory->usage(angle_atom2,nmax,atom->angle_per_atom);
+  if (atom->memcheck("angle_atom3"))
+    bytes += memory->usage(angle_atom3,nmax,atom->angle_per_atom);
+
+  if (atom->memcheck("num_dihedral")) bytes += memory->usage(num_dihedral,nmax);
+  if (atom->memcheck("dihedral_type"))
+    bytes += memory->usage(dihedral_type,nmax,atom->dihedral_per_atom);
+  if (atom->memcheck("dihedral_atom1"))
+    bytes += memory->usage(dihedral_atom1,nmax,atom->dihedral_per_atom);
+  if (atom->memcheck("dihedral_atom2"))
+    bytes += memory->usage(dihedral_atom2,nmax,atom->dihedral_per_atom);
+  if (atom->memcheck("dihedral_atom3"))
+    bytes += memory->usage(dihedral_atom3,nmax,atom->dihedral_per_atom);
+  if (atom->memcheck("dihedral_atom4"))
+    bytes += memory->usage(dihedral_atom4,nmax,atom->dihedral_per_atom);
+  if (atom->memcheck("num_improper")) bytes += memory->usage(num_improper,nmax);
+  if (atom->memcheck("improper_type"))
+    bytes += memory->usage(improper_type,nmax,atom->improper_per_atom);
+  if (atom->memcheck("improper_atom1"))
+    bytes += memory->usage(improper_atom1,nmax,atom->improper_per_atom);
+  if (atom->memcheck("improper_atom2"))
+    bytes += memory->usage(improper_atom2,nmax,atom->improper_per_atom);
+  if (atom->memcheck("improper_atom3"))
+    bytes += memory->usage(improper_atom3,nmax,atom->improper_per_atom);
+  if (atom->memcheck("improper_atom4"))
+    bytes += memory->usage(improper_atom4,nmax,atom->improper_per_atom);
+
+  return bytes;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecFullKokkos::sync(ExecutionSpace space, unsigned int mask)
+{
+  if (space == Device) {
+    if (mask & X_MASK) atomKK->k_x.sync<LMPDeviceType>();
+    if (mask & V_MASK) atomKK->k_v.sync<LMPDeviceType>();
+    if (mask & F_MASK) atomKK->k_f.sync<LMPDeviceType>();
+    if (mask & TAG_MASK) atomKK->k_tag.sync<LMPDeviceType>();
+    if (mask & TYPE_MASK) atomKK->k_type.sync<LMPDeviceType>();
+    if (mask & MASK_MASK) atomKK->k_mask.sync<LMPDeviceType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPDeviceType>();
+    if (mask && Q_MASK) atomKK->k_q.sync<LMPDeviceType>();
+    if (mask && MOLECULE_MASK) atomKK->k_molecule.sync<LMPDeviceType>();
+    if (mask && SPECIAL_MASK) {
+      atomKK->k_nspecial.sync<LMPDeviceType>();
+      atomKK->k_special.sync<LMPDeviceType>();
+    }
+    if (mask && BOND_MASK) {
+      atomKK->k_num_bond.sync<LMPDeviceType>();
+      atomKK->k_bond_type.sync<LMPDeviceType>();
+      atomKK->k_bond_atom.sync<LMPDeviceType>();
+    }
+    if (mask && ANGLE_MASK) {
+      atomKK->k_num_angle.sync<LMPDeviceType>();
+      atomKK->k_angle_type.sync<LMPDeviceType>();
+      atomKK->k_angle_atom1.sync<LMPDeviceType>();
+      atomKK->k_angle_atom2.sync<LMPDeviceType>();
+      atomKK->k_angle_atom3.sync<LMPDeviceType>();
+    }
+    if (mask && DIHEDRAL_MASK) {
+      atomKK->k_num_dihedral.sync<LMPDeviceType>();
+      atomKK->k_dihedral_type.sync<LMPDeviceType>();
+      atomKK->k_dihedral_atom1.sync<LMPDeviceType>();
+      atomKK->k_dihedral_atom2.sync<LMPDeviceType>();
+      atomKK->k_dihedral_atom3.sync<LMPDeviceType>();
+      atomKK->k_dihedral_atom4.sync<LMPDeviceType>();
+    }
+    if (mask && IMPROPER_MASK) {
+      atomKK->k_num_improper.sync<LMPDeviceType>();
+      atomKK->k_improper_type.sync<LMPDeviceType>();
+      atomKK->k_improper_atom1.sync<LMPDeviceType>();
+      atomKK->k_improper_atom2.sync<LMPDeviceType>();
+      atomKK->k_improper_atom3.sync<LMPDeviceType>();
+      atomKK->k_improper_atom3.sync<LMPDeviceType>();
+    }
+  } else {
+    if (mask & X_MASK) atomKK->k_x.sync<LMPHostType>();
+    if (mask & V_MASK) atomKK->k_v.sync<LMPHostType>();
+    if (mask & F_MASK) atomKK->k_f.sync<LMPHostType>();
+    if (mask & TAG_MASK) atomKK->k_tag.sync<LMPHostType>();
+    if (mask & TYPE_MASK) atomKK->k_type.sync<LMPHostType>();
+    if (mask & MASK_MASK) atomKK->k_mask.sync<LMPHostType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPHostType>();
+    if (mask && Q_MASK) atomKK->k_q.sync<LMPHostType>();
+    if (mask && MOLECULE_MASK) atomKK->k_molecule.sync<LMPHostType>();
+    if (mask && SPECIAL_MASK) {
+      atomKK->k_nspecial.sync<LMPHostType>();
+      atomKK->k_special.sync<LMPHostType>();
+    }
+    if (mask && BOND_MASK) {
+      atomKK->k_num_bond.sync<LMPHostType>();
+      atomKK->k_bond_type.sync<LMPHostType>();
+      atomKK->k_bond_atom.sync<LMPHostType>();
+    }
+    if (mask && ANGLE_MASK) {
+      atomKK->k_num_angle.sync<LMPHostType>();
+      atomKK->k_angle_type.sync<LMPHostType>();
+      atomKK->k_angle_atom1.sync<LMPHostType>();
+      atomKK->k_angle_atom2.sync<LMPHostType>();
+      atomKK->k_angle_atom3.sync<LMPHostType>();
+    }
+    if (mask && DIHEDRAL_MASK) {
+      atomKK->k_num_dihedral.sync<LMPHostType>();
+      atomKK->k_dihedral_type.sync<LMPHostType>();
+      atomKK->k_dihedral_atom1.sync<LMPHostType>();
+      atomKK->k_dihedral_atom2.sync<LMPHostType>();
+      atomKK->k_dihedral_atom3.sync<LMPHostType>();
+      atomKK->k_dihedral_atom4.sync<LMPHostType>();
+    }
+    if (mask && IMPROPER_MASK) {
+      atomKK->k_num_improper.sync<LMPHostType>();
+      atomKK->k_improper_type.sync<LMPHostType>();
+      atomKK->k_improper_atom1.sync<LMPHostType>();
+      atomKK->k_improper_atom2.sync<LMPHostType>();
+      atomKK->k_improper_atom3.sync<LMPHostType>();
+      atomKK->k_improper_atom3.sync<LMPHostType>();
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecFullKokkos::modified(ExecutionSpace space, unsigned int mask)
+{
+  if (space == Device) {
+    if (mask & X_MASK) atomKK->k_x.modify<LMPDeviceType>();
+    if (mask & V_MASK) atomKK->k_v.modify<LMPDeviceType>();
+    if (mask & F_MASK) atomKK->k_f.modify<LMPDeviceType>();
+    if (mask & TAG_MASK) atomKK->k_tag.modify<LMPDeviceType>();
+    if (mask & TYPE_MASK) atomKK->k_type.modify<LMPDeviceType>();
+    if (mask & MASK_MASK) atomKK->k_mask.modify<LMPDeviceType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPDeviceType>();
+    if (mask && Q_MASK) atomKK->k_q.modify<LMPDeviceType>();
+    if (mask && MOLECULE_MASK) atomKK->k_molecule.modify<LMPDeviceType>();
+    if (mask && SPECIAL_MASK) {
+      atomKK->k_nspecial.modify<LMPDeviceType>();
+      atomKK->k_special.modify<LMPDeviceType>();
+    }
+    if (mask && BOND_MASK) {
+      atomKK->k_num_bond.modify<LMPDeviceType>();
+      atomKK->k_bond_type.modify<LMPDeviceType>();
+      atomKK->k_bond_atom.modify<LMPDeviceType>();
+    }
+    if (mask && ANGLE_MASK) {
+      atomKK->k_num_angle.modify<LMPDeviceType>();
+      atomKK->k_angle_type.modify<LMPDeviceType>();
+      atomKK->k_angle_atom1.modify<LMPDeviceType>();
+      atomKK->k_angle_atom2.modify<LMPDeviceType>();
+      atomKK->k_angle_atom3.modify<LMPDeviceType>();
+    }
+    if (mask && DIHEDRAL_MASK) {
+      atomKK->k_num_dihedral.modify<LMPDeviceType>();
+      atomKK->k_dihedral_type.modify<LMPDeviceType>();
+      atomKK->k_dihedral_atom1.modify<LMPDeviceType>();
+      atomKK->k_dihedral_atom2.modify<LMPDeviceType>();
+      atomKK->k_dihedral_atom3.modify<LMPDeviceType>();
+      atomKK->k_dihedral_atom4.modify<LMPDeviceType>();
+    }
+    if (mask && IMPROPER_MASK) {
+      atomKK->k_num_improper.modify<LMPDeviceType>();
+      atomKK->k_improper_type.modify<LMPDeviceType>();
+      atomKK->k_improper_atom1.modify<LMPDeviceType>();
+      atomKK->k_improper_atom2.modify<LMPDeviceType>();
+      atomKK->k_improper_atom3.modify<LMPDeviceType>();
+      atomKK->k_improper_atom3.modify<LMPDeviceType>();
+    }
+  } else {
+    if (mask & X_MASK) atomKK->k_x.modify<LMPHostType>();
+    if (mask & V_MASK) atomKK->k_v.modify<LMPHostType>();
+    if (mask & F_MASK) atomKK->k_f.modify<LMPHostType>();
+    if (mask & TAG_MASK) atomKK->k_tag.modify<LMPHostType>();
+    if (mask & TYPE_MASK) atomKK->k_type.modify<LMPHostType>();
+    if (mask & MASK_MASK) atomKK->k_mask.modify<LMPHostType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPHostType>();
+    if (mask && Q_MASK) atomKK->k_q.modify<LMPHostType>();
+    if (mask && MOLECULE_MASK) atomKK->k_molecule.modify<LMPHostType>();
+    if (mask && SPECIAL_MASK) {
+      atomKK->k_nspecial.modify<LMPHostType>();
+      atomKK->k_special.modify<LMPHostType>();
+    }
+    if (mask && BOND_MASK) {
+      atomKK->k_num_bond.modify<LMPHostType>();
+      atomKK->k_bond_type.modify<LMPHostType>();
+      atomKK->k_bond_atom.modify<LMPHostType>();
+    }
+    if (mask && ANGLE_MASK) {
+      atomKK->k_num_angle.modify<LMPHostType>();
+      atomKK->k_angle_type.modify<LMPHostType>();
+      atomKK->k_angle_atom1.modify<LMPHostType>();
+      atomKK->k_angle_atom2.modify<LMPHostType>();
+      atomKK->k_angle_atom3.modify<LMPHostType>();
+    }
+    if (mask && DIHEDRAL_MASK) {
+      atomKK->k_num_dihedral.modify<LMPHostType>();
+      atomKK->k_dihedral_type.modify<LMPHostType>();
+      atomKK->k_dihedral_atom1.modify<LMPHostType>();
+      atomKK->k_dihedral_atom2.modify<LMPHostType>();
+      atomKK->k_dihedral_atom3.modify<LMPHostType>();
+      atomKK->k_dihedral_atom4.modify<LMPHostType>();
+    }
+    if (mask && IMPROPER_MASK) {
+      atomKK->k_num_improper.modify<LMPHostType>();
+      atomKK->k_improper_type.modify<LMPHostType>();
+      atomKK->k_improper_atom1.modify<LMPHostType>();
+      atomKK->k_improper_atom2.modify<LMPHostType>();
+      atomKK->k_improper_atom3.modify<LMPHostType>();
+      atomKK->k_improper_atom3.modify<LMPHostType>();
+    }
+  }
+}
diff --git a/src/KOKKOS/atom_vec_full_kokkos.h b/src/KOKKOS/atom_vec_full_kokkos.h
new file mode 100644
index 0000000000000000000000000000000000000000..43290bb2198a863d0543e45418431b9c7b3e06b4
--- /dev/null
+++ b/src/KOKKOS/atom_vec_full_kokkos.h
@@ -0,0 +1,183 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef ATOM_CLASS
+
+AtomStyle(full/kk,AtomVecFullKokkos)
+
+#else
+
+#ifndef LMP_ATOM_VEC_FULL_KOKKOS_H
+#define LMP_ATOM_VEC_FULL_KOKKOS_H
+
+#include "atom_vec_kokkos.h"
+
+namespace LAMMPS_NS {
+
+class AtomVecFullKokkos : public AtomVecKokkos {
+ public:
+  AtomVecFullKokkos(class LAMMPS *);
+  virtual ~AtomVecFullKokkos() {}
+  void grow(int);
+  void copy(int, int, int);
+  int pack_comm(int, int *, double *, int, int *);
+  int pack_comm_vel(int, int *, double *, int, int *);
+  void unpack_comm(int, int, double *);
+  void unpack_comm_vel(int, int, double *);
+  int pack_reverse(int, int, double *);
+  void unpack_reverse(int, int *, double *);
+  int pack_border(int, int *, double *, int, int *);
+  int pack_border_vel(int, int *, double *, int, int *);
+  int pack_border_hybrid(int, int *, double *);
+  void unpack_border(int, int, double *);
+  void unpack_border_vel(int, int, double *);
+  int unpack_border_hybrid(int, int, double *);
+  int pack_exchange(int, double *);
+  int unpack_exchange(double *);
+  int size_restart();
+  int pack_restart(int, double *);
+  int unpack_restart(double *);
+  void create_atom(int, double *);
+  void data_atom(double *, tagint, char **);
+  int data_atom_hybrid(int, char **);
+  void pack_data(double **);
+  int pack_data_hybrid(int, double *);
+  void write_data(FILE *, int, double **);
+  int write_data_hybrid(FILE *, double *);
+  bigint memory_usage();
+
+  void grow_reset();
+  int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
+                       const int & iswap,
+                       const DAT::tdual_xfloat_2d &buf,
+                       const int &pbc_flag, const int pbc[]);
+  void unpack_comm_kokkos(const int &n, const int &nfirst,
+                          const DAT::tdual_xfloat_2d &buf);
+  int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
+                     const int & iswap, const int nfirst,
+                     const int &pbc_flag, const int pbc[]);
+  int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
+                         DAT::tdual_xfloat_2d buf,int iswap,
+                         int pbc_flag, int *pbc, ExecutionSpace space);
+  void unpack_border_kokkos(const int &n, const int &nfirst,
+                            const DAT::tdual_xfloat_2d &buf,
+                            ExecutionSpace space);
+  int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
+                           DAT::tdual_int_1d k_sendlist,
+                           DAT::tdual_int_1d k_copylist,
+                           ExecutionSpace space, int dim,
+                           X_FLOAT lo, X_FLOAT hi);
+  int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
+                             int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
+                             ExecutionSpace space);
+
+  void sync(ExecutionSpace space, unsigned int mask);
+  void modified(ExecutionSpace space, unsigned int mask);
+
+ protected:
+
+  tagint *tag;
+  int *type,*mask;
+  imageint *image;
+  double **x,**v,**f;
+
+  double *q;
+
+  tagint *molecule;
+  int **nspecial;
+  tagint **special;
+  int *num_bond;
+  int **bond_type;
+  tagint **bond_atom;
+
+  int *num_angle;
+  int **angle_type;
+  tagint **angle_atom1,**angle_atom2,**angle_atom3;
+
+  int *num_dihedral;
+  int **dihedral_type;
+  tagint **dihedral_atom1,**dihedral_atom2,**dihedral_atom3,**dihedral_atom4;
+  int *num_improper;
+  int **improper_type;
+  tagint **improper_atom1,**improper_atom2,**improper_atom3,**improper_atom4;
+
+  DAT::t_tagint_1d d_tag;
+  DAT::t_int_1d d_type, d_mask;
+  HAT::t_tagint_1d h_tag;
+  HAT::t_int_1d h_type, h_mask;
+
+  DAT::t_imageint_1d d_image;
+  HAT::t_imageint_1d h_image;
+
+  DAT::t_x_array d_x;
+  DAT::t_v_array d_v;
+  DAT::t_f_array d_f;
+  HAT::t_x_array h_x;
+  HAT::t_v_array h_v;
+  HAT::t_f_array h_f;
+
+  DAT::t_float_1d d_q;
+  HAT::t_float_1d h_q;
+
+  DAT::t_tagint_1d d_molecule;
+  DAT::t_int_2d d_nspecial;
+  DAT::t_tagint_2d d_special;
+  DAT::t_int_1d d_num_bond;
+  DAT::t_int_2d d_bond_type;
+  DAT::t_tagint_2d d_bond_atom;
+
+  HAT::t_tagint_1d h_molecule;
+  HAT::t_int_2d h_nspecial;
+  HAT::t_tagint_2d h_special;
+  HAT::t_int_1d h_num_bond;
+  HAT::t_int_2d h_bond_type;
+  HAT::t_tagint_2d h_bond_atom;
+
+  DAT::t_int_1d d_num_angle;
+  DAT::t_int_2d d_angle_type;
+  DAT::t_tagint_2d d_angle_atom1,d_angle_atom2,d_angle_atom3;
+
+  HAT::t_int_1d h_num_angle;
+  HAT::t_int_2d h_angle_type;
+  HAT::t_tagint_2d h_angle_atom1,h_angle_atom2,h_angle_atom3;
+
+  DAT::t_int_1d d_num_dihedral;
+  DAT::t_int_2d d_dihedral_type;
+  DAT::t_tagint_2d d_dihedral_atom1,d_dihedral_atom2,
+    d_dihedral_atom3,d_dihedral_atom4;
+  DAT::t_int_1d d_num_improper;
+  DAT::t_int_2d d_improper_type;
+  DAT::t_tagint_2d d_improper_atom1,d_improper_atom2,
+    d_improper_atom3,d_improper_atom4;
+
+  HAT::t_int_1d h_num_dihedral;
+  HAT::t_int_2d h_dihedral_type;
+  HAT::t_tagint_2d h_dihedral_atom1,h_dihedral_atom2,
+    h_dihedral_atom3,h_dihedral_atom4;
+  HAT::t_int_1d h_num_improper;
+  HAT::t_int_2d h_improper_type;
+  HAT::t_tagint_2d h_improper_atom1,h_improper_atom2,
+    h_improper_atom3,h_improper_atom4;
+
+  HAT::tdual_int_1d k_count;
+
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+*/
diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h
index ac651b0b5a5a7efc662ea875a3dba54d46e3d9a3..e555f587e5f0205bca1e985254a2cb64924922d8 100644
--- a/src/KOKKOS/atom_vec_kokkos.h
+++ b/src/KOKKOS/atom_vec_kokkos.h
@@ -24,43 +24,43 @@ class AtomVecKokkos : public AtomVec {
   AtomVecKokkos(class LAMMPS *);
   virtual ~AtomVecKokkos() {}
 
-  virtual void sync(ExecutionSpace space, unsigned int mask) {};
-  virtual void modified(ExecutionSpace space, unsigned int mask) {};
+  virtual void sync(ExecutionSpace space, unsigned int mask) = 0;
+  virtual void modified(ExecutionSpace space, unsigned int mask) = 0;
 
   virtual int 
     pack_comm_self(const int &n, const DAT::tdual_int_2d &list, 
                    const int & iswap, const int nfirst, 
-                   const int &pbc_flag, const int pbc[])
-  {return 0;}
+                   const int &pbc_flag, const int pbc[]) = 0;
+  //{return 0;}
   virtual int 
     pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &list, 
                      const int & iswap, const DAT::tdual_xfloat_2d &buf,
-                     const int &pbc_flag, const int pbc[])
-  {return 0;}
+                     const int &pbc_flag, const int pbc[]) = 0;
+  //{return 0;}
   virtual void 
     unpack_comm_kokkos(const int &n, const int &nfirst, 
-                       const DAT::tdual_xfloat_2d &buf) {};
+                       const DAT::tdual_xfloat_2d &buf) = 0;
   virtual int 
     pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, 
                        DAT::tdual_xfloat_2d buf,int iswap,
-                       int pbc_flag, int *pbc, ExecutionSpace space)
-  {return 0;};
+                       int pbc_flag, int *pbc, ExecutionSpace space) = 0;
+  //{return 0;};
   virtual void 
     unpack_border_kokkos(const int &n, const int &nfirst, 
                          const DAT::tdual_xfloat_2d &buf, 
-                         ExecutionSpace space) {};
+                         ExecutionSpace space) = 0;
 
   virtual int 
     pack_exchange_kokkos(const int &nsend, DAT::tdual_xfloat_2d &buf, 
                          DAT::tdual_int_1d k_sendlist,
                          DAT::tdual_int_1d k_copylist,
-                         ExecutionSpace space, int dim, X_FLOAT lo, X_FLOAT hi) 
-  {return 0;};
+                         ExecutionSpace space, int dim, X_FLOAT lo, X_FLOAT hi) = 0;
+  //{return 0;};
   virtual int 
     unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
                            int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
-                           ExecutionSpace space)
-  {return 0;};
+                           ExecutionSpace space) = 0;
+  //{return 0;};
 
  protected:
   class AtomKokkos *atomKK;
diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.cpp b/src/KOKKOS/atom_vec_molecular_kokkos.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b75023cd37a0fd174462c836c9b0144e8d5993a9
--- /dev/null
+++ b/src/KOKKOS/atom_vec_molecular_kokkos.cpp
@@ -0,0 +1,2236 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "stdlib.h"
+#include "atom_vec_molecular_kokkos.h"
+#include "atom_kokkos.h"
+#include "comm_kokkos.h"
+#include "domain.h"
+#include "modify.h"
+#include "fix.h"
+#include "atom_masks.h"
+#include "memory.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+#define DELTA 10000
+
+/* ---------------------------------------------------------------------- */
+
+AtomVecMolecularKokkos::AtomVecMolecularKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp)
+{
+  molecular = 1;
+  bonds_allow = angles_allow = dihedrals_allow = impropers_allow = 1;
+  mass_type = 1;
+
+  comm_x_only = comm_f_only = 1;
+  size_forward = 3;
+  size_reverse = 3;
+  size_border = 7;
+  size_velocity = 3;
+  size_data_atom = 6;
+  size_data_vel = 4;
+  xcol_data = 4;
+
+  atom->molecule_flag = 1;
+
+  k_count = DAT::tdual_int_1d("atom::k_count",1);
+  atomKK = (AtomKokkos *) atom;
+  commKK = (CommKokkos *) comm;
+}
+
+/* ----------------------------------------------------------------------
+   grow atom arrays
+   n = 0 grows arrays by DELTA
+   n > 0 allocates arrays to size n
+------------------------------------------------------------------------- */
+
+void AtomVecMolecularKokkos::grow(int n)
+{
+  if (n == 0) nmax += DELTA;
+  else nmax = n;
+  atomKK->nmax = nmax;
+  if (nmax < 0 || nmax > MAXSMALLINT)
+    error->one(FLERR,"Per-processor system is too big");
+
+  sync(Device,ALL_MASK);
+  modified(Device,ALL_MASK);
+
+  memory->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag");
+  memory->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type");
+  memory->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask");
+  memory->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image");
+
+  memory->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x");
+  memory->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v");
+  memory->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f");
+
+  memory->grow_kokkos(atomKK->k_molecule,atomKK->molecule,nmax,"atom:molecule");
+  memory->grow_kokkos(atomKK->k_nspecial,atomKK->nspecial,nmax,3,"atom:nspecial");
+  memory->grow_kokkos(atomKK->k_special,atomKK->special,nmax,atomKK->maxspecial,
+		      "atom:special");
+  memory->grow_kokkos(atomKK->k_num_bond,atomKK->num_bond,nmax,"atom:num_bond");
+  memory->grow_kokkos(atomKK->k_bond_type,atomKK->bond_type,nmax,atomKK->bond_per_atom,
+		      "atom:bond_type");
+  memory->grow_kokkos(atomKK->k_bond_atom,atomKK->bond_atom,nmax,atomKK->bond_per_atom,
+		      "atom:bond_atom");
+
+  memory->grow_kokkos(atomKK->k_num_angle,atomKK->num_angle,nmax,"atom:num_angle");
+  memory->grow_kokkos(atomKK->k_angle_type,atomKK->angle_type,nmax,atomKK->angle_per_atom,
+		      "atom:angle_type");
+  memory->grow_kokkos(atomKK->k_angle_atom1,atomKK->angle_atom1,nmax,atomKK->angle_per_atom,
+		      "atom:angle_atom1");
+  memory->grow_kokkos(atomKK->k_angle_atom2,atomKK->angle_atom2,nmax,atomKK->angle_per_atom,
+		      "atom:angle_atom2");
+  memory->grow_kokkos(atomKK->k_angle_atom3,atomKK->angle_atom3,nmax,atomKK->angle_per_atom,
+		      "atom:angle_atom3");
+
+  memory->grow_kokkos(atomKK->k_num_dihedral,atomKK->num_dihedral,nmax,"atom:num_dihedral");
+  memory->grow_kokkos(atomKK->k_dihedral_type,atomKK->dihedral_type,nmax,
+                      atomKK->dihedral_per_atom,"atom:dihedral_type");
+  memory->grow_kokkos(atomKK->k_dihedral_atom1,atomKK->dihedral_atom1,nmax,
+                      atomKK->dihedral_per_atom,"atom:dihedral_atom1");
+  memory->grow_kokkos(atomKK->k_dihedral_atom2,atomKK->dihedral_atom2,nmax,
+                      atomKK->dihedral_per_atom,"atom:dihedral_atom2");
+  memory->grow_kokkos(atomKK->k_dihedral_atom3,atomKK->dihedral_atom3,nmax,
+                      atomKK->dihedral_per_atom,"atom:dihedral_atom3");
+  memory->grow_kokkos(atomKK->k_dihedral_atom4,atomKK->dihedral_atom4,nmax,
+                      atomKK->dihedral_per_atom,"atom:dihedral_atom4");
+
+  memory->grow_kokkos(atomKK->k_num_improper,atomKK->num_improper,nmax,"atom:num_improper");
+  memory->grow_kokkos(atomKK->k_improper_type,atomKK->improper_type,nmax,
+                      atomKK->improper_per_atom,"atom:improper_type");
+  memory->grow_kokkos(atomKK->k_improper_atom1,atomKK->improper_atom1,nmax,
+                      atomKK->improper_per_atom,"atom:improper_atom1");
+  memory->grow_kokkos(atomKK->k_improper_atom2,atomKK->improper_atom2,nmax,
+                      atomKK->improper_per_atom,"atom:improper_atom2");
+  memory->grow_kokkos(atomKK->k_improper_atom3,atomKK->improper_atom3,nmax,
+                      atomKK->improper_per_atom,"atom:improper_atom3");
+  memory->grow_kokkos(atomKK->k_improper_atom4,atomKK->improper_atom4,nmax,
+                      atomKK->improper_per_atom,"atom:improper_atom4");
+
+  grow_reset();
+  sync(Host,ALL_MASK);
+
+  if (atom->nextra_grow)
+    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
+      modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax);
+}
+
+/* ----------------------------------------------------------------------
+   reset local array ptrs
+------------------------------------------------------------------------- */
+
+void AtomVecMolecularKokkos::grow_reset()
+{
+  tag = atomKK->tag;
+  d_tag = atomKK->k_tag.d_view;
+  h_tag = atomKK->k_tag.h_view;
+
+  type = atomKK->type;
+  d_type = atomKK->k_type.d_view;
+  h_type = atomKK->k_type.h_view;
+  mask = atomKK->mask;
+  d_mask = atomKK->k_mask.d_view;
+  h_mask = atomKK->k_mask.h_view;
+  image = atomKK->image;
+  d_image = atomKK->k_image.d_view;
+  h_image = atomKK->k_image.h_view;
+
+  x = atomKK->x;
+  d_x = atomKK->k_x.d_view;
+  h_x = atomKK->k_x.h_view;
+  v = atomKK->v;
+  d_v = atomKK->k_v.d_view;
+  h_v = atomKK->k_v.h_view;
+  f = atomKK->f;
+  d_f = atomKK->k_f.d_view;
+  h_f = atomKK->k_f.h_view;
+
+  molecule = atomKK->molecule;
+  d_molecule = atomKK->k_molecule.d_view;
+  h_molecule = atomKK->k_molecule.h_view;
+  nspecial = atomKK->nspecial;
+  d_nspecial = atomKK->k_nspecial.d_view;
+  h_nspecial = atomKK->k_nspecial.h_view;
+  special = atomKK->special;
+  d_special = atomKK->k_special.d_view;
+  h_special = atomKK->k_special.h_view;
+  num_bond = atomKK->num_bond;
+  d_num_bond = atomKK->k_num_bond.d_view;
+  h_num_bond = atomKK->k_num_bond.h_view;
+  bond_type = atomKK->bond_type;
+  d_bond_type = atomKK->k_bond_type.d_view;
+  h_bond_type = atomKK->k_bond_type.h_view;
+  bond_atom = atomKK->bond_atom;
+  d_bond_atom = atomKK->k_bond_atom.d_view;
+  h_bond_atom = atomKK->k_bond_atom.h_view;
+
+  num_angle = atomKK->num_angle;
+  d_num_angle = atomKK->k_num_angle.d_view;
+  h_num_angle = atomKK->k_num_angle.h_view;
+  angle_type = atomKK->angle_type;
+  d_angle_type = atomKK->k_angle_type.d_view;
+  h_angle_type = atomKK->k_angle_type.h_view;
+  angle_atom1 = atomKK->angle_atom1;
+  d_angle_atom1 = atomKK->k_angle_atom1.d_view;
+  h_angle_atom1 = atomKK->k_angle_atom1.h_view;
+  angle_atom2 = atomKK->angle_atom2;
+  d_angle_atom2 = atomKK->k_angle_atom2.d_view;
+  h_angle_atom2 = atomKK->k_angle_atom2.h_view;
+  angle_atom3 = atomKK->angle_atom3;
+  d_angle_atom3 = atomKK->k_angle_atom3.d_view;
+  h_angle_atom3 = atomKK->k_angle_atom3.h_view;
+
+  num_dihedral = atomKK->num_dihedral;
+  d_num_dihedral = atomKK->k_num_dihedral.d_view;
+  h_num_dihedral = atomKK->k_num_dihedral.h_view;
+  dihedral_type = atomKK->dihedral_type;
+  d_dihedral_type = atomKK->k_dihedral_type.d_view;
+  h_dihedral_type = atomKK->k_dihedral_type.h_view;
+  dihedral_atom1 = atomKK->dihedral_atom1;
+  d_dihedral_atom1 = atomKK->k_dihedral_atom1.d_view;
+  h_dihedral_atom1 = atomKK->k_dihedral_atom1.h_view;
+  dihedral_atom2 = atomKK->dihedral_atom2;
+  d_dihedral_atom2 = atomKK->k_dihedral_atom2.d_view;
+  h_dihedral_atom2 = atomKK->k_dihedral_atom2.h_view;
+  dihedral_atom3 = atomKK->dihedral_atom3;
+  d_dihedral_atom3 = atomKK->k_dihedral_atom3.d_view;
+  h_dihedral_atom3 = atomKK->k_dihedral_atom3.h_view;
+  dihedral_atom4 = atomKK->dihedral_atom4;
+  d_dihedral_atom4 = atomKK->k_dihedral_atom4.d_view;
+  h_dihedral_atom4 = atomKK->k_dihedral_atom4.h_view;
+
+  num_improper = atomKK->num_improper;
+  d_num_improper = atomKK->k_num_improper.d_view;
+  h_num_improper = atomKK->k_num_improper.h_view;
+  improper_type = atomKK->improper_type;
+  d_improper_type = atomKK->k_improper_type.d_view;
+  h_improper_type = atomKK->k_improper_type.h_view;
+  improper_atom1 = atomKK->improper_atom1;
+  d_improper_atom1 = atomKK->k_improper_atom1.d_view;
+  h_improper_atom1 = atomKK->k_improper_atom1.h_view;
+  improper_atom2 = atomKK->improper_atom2;
+  d_improper_atom2 = atomKK->k_improper_atom2.d_view;
+  h_improper_atom2 = atomKK->k_improper_atom2.h_view;
+  improper_atom3 = atomKK->improper_atom3;
+  d_improper_atom3 = atomKK->k_improper_atom3.d_view;
+  h_improper_atom3 = atomKK->k_improper_atom3.h_view;
+  improper_atom4 = atomKK->improper_atom4;
+  d_improper_atom4 = atomKK->k_improper_atom4.d_view;
+  h_improper_atom4 = atomKK->k_improper_atom4.h_view;
+}
+
+/* ----------------------------------------------------------------------
+   copy atom I info to atom J
+------------------------------------------------------------------------- */
+
+void AtomVecMolecularKokkos::copy(int i, int j, int delflag)
+{
+  int k;
+
+  h_tag[j] = h_tag[i];
+  h_type[j] = h_type[i];
+  mask[j] = mask[i];
+  h_image[j] = h_image[i];
+  h_x(j,0) = h_x(i,0);
+  h_x(j,1) = h_x(i,1);
+  h_x(j,2) = h_x(i,2);
+  h_v(j,0) = h_v(i,0);
+  h_v(j,1) = h_v(i,1);
+  h_v(j,2) = h_v(i,2);
+
+  h_molecule(j) = h_molecule(i);
+
+  h_num_bond(j) = h_num_bond(i);
+  for (k = 0; k < h_num_bond(j); k++) {
+    h_bond_type(j,k) = h_bond_type(i,k);
+    h_bond_atom(j,k) = h_bond_atom(i,k);
+  }
+
+  h_nspecial(j,0) = h_nspecial(i,0);
+  h_nspecial(j,1) = h_nspecial(i,1);
+  h_nspecial(j,2) = h_nspecial(i,2);
+  for (k = 0; k < h_nspecial(j,2); k++)
+    h_special(j,k) = h_special(i,k);
+
+  h_num_angle(j) = h_num_angle(i);
+  for (k = 0; k < h_num_angle(j); k++) {
+    h_angle_type(j,k) = h_angle_type(i,k);
+    h_angle_atom1(j,k) = h_angle_atom1(i,k);
+    h_angle_atom2(j,k) = h_angle_atom2(i,k);
+    h_angle_atom3(j,k) = h_angle_atom3(i,k);
+  }
+
+  h_num_dihedral(j) = h_num_dihedral(i);
+  for (k = 0; k < h_num_dihedral(j); k++) {
+    h_dihedral_type(j,k) = h_dihedral_type(i,k);
+    h_dihedral_atom1(j,k) = h_dihedral_atom1(i,k);
+    h_dihedral_atom2(j,k) = h_dihedral_atom2(i,k);
+    h_dihedral_atom3(j,k) = h_dihedral_atom3(i,k);
+    h_dihedral_atom4(j,k) = h_dihedral_atom4(i,k);
+  }
+
+  h_num_improper(j) = h_num_improper(i);
+  for (k = 0; k < h_num_improper(j); k++) {
+    h_improper_type(j,k) = h_improper_type(i,k);
+    h_improper_atom1(j,k) = h_improper_atom1(i,k);
+    h_improper_atom2(j,k) = h_improper_atom2(i,k);
+    h_improper_atom3(j,k) = h_improper_atom3(i,k);
+    h_improper_atom4(j,k) = h_improper_atom4(i,k);
+  }
+
+  if (atom->nextra_grow)
+    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
+      modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType,int PBC_FLAG,int TRICLINIC>
+struct AtomVecMolecularKokkos_PackComm {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
+  typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
+  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
+  const int _iswap;
+  X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
+  X_FLOAT _pbc[6];
+
+  AtomVecMolecularKokkos_PackComm(
+      const typename DAT::tdual_x_array &x,
+      const typename DAT::tdual_xfloat_2d &buf,
+      const typename DAT::tdual_int_2d &list,
+      const int & iswap,
+      const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
+      const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
+      _x(x.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap),
+      _xprd(xprd),_yprd(yprd),_zprd(zprd),
+      _xy(xy),_xz(xz),_yz(yz) {
+        const size_t maxsend = (buf.view<DeviceType>().dimension_0()
+				*buf.view<DeviceType>().dimension_1())/3;
+        const size_t elements = 3;
+        buffer_view<DeviceType>(_buf,buf,maxsend,elements);
+        _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
+        _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
+  };
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+        const int j = _list(_iswap,i);
+      if (PBC_FLAG == 0) {
+          _buf(i,0) = _x(j,0);
+          _buf(i,1) = _x(j,1);
+          _buf(i,2) = _x(j,2);
+      } else {
+        if (TRICLINIC == 0) {
+          _buf(i,0) = _x(j,0) + _pbc[0]*_xprd;
+          _buf(i,1) = _x(j,1) + _pbc[1]*_yprd;
+          _buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
+        } else {
+          _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
+          _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
+          _buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
+        }
+      }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecMolecularKokkos::pack_comm_kokkos(const int &n,
+                                             const DAT::tdual_int_2d &list,
+                                             const int & iswap,
+                                             const DAT::tdual_xfloat_2d &buf,
+                                             const int &pbc_flag,
+                                             const int* const pbc)
+{
+  // Check whether to always run forward communication on the host
+  // Choose correct forward PackComm kernel
+
+  if(commKK->forward_comm_on_host) {
+    sync(Host,X_MASK);
+    if(pbc_flag) {
+      if(domain->triclinic) {
+        struct AtomVecMolecularKokkos_PackComm<LMPHostType,1,1>
+          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
+            domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      } else {
+        struct AtomVecMolecularKokkos_PackComm<LMPHostType,1,0>
+          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
+            domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      }
+    } else {
+      if(domain->triclinic) {
+        struct AtomVecMolecularKokkos_PackComm<LMPHostType,0,1>
+          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
+            domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      } else {
+        struct AtomVecMolecularKokkos_PackComm<LMPHostType,0,0>
+          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
+            domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      }
+    }
+    LMPHostType::fence();
+  } else {
+    sync(Device,X_MASK);
+    if(pbc_flag) {
+      if(domain->triclinic) {
+        struct AtomVecMolecularKokkos_PackComm<LMPDeviceType,1,1>
+          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
+            domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      } else {
+        struct AtomVecMolecularKokkos_PackComm<LMPDeviceType,1,0>
+          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
+            domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      }
+    } else {
+      if(domain->triclinic) {
+        struct AtomVecMolecularKokkos_PackComm<LMPDeviceType,0,1>
+          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
+            domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      } else {
+        struct AtomVecMolecularKokkos_PackComm<LMPDeviceType,0,0>
+          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
+            domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      }
+    }
+    LMPDeviceType::fence();
+  }
+
+	return n*size_forward;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType,int PBC_FLAG,int TRICLINIC>
+struct AtomVecMolecularKokkos_PackCommSelf {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
+  typename ArrayTypes<DeviceType>::t_x_array _xw;
+  int _nfirst;
+  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
+  const int _iswap;
+  X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
+  X_FLOAT _pbc[6];
+
+  AtomVecMolecularKokkos_PackCommSelf(
+      const typename DAT::tdual_x_array &x,
+      const int &nfirst,
+      const typename DAT::tdual_int_2d &list,
+      const int & iswap,
+      const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
+      const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
+    _x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),_nfirst(nfirst),
+    _list(list.view<DeviceType>()),_iswap(iswap),
+    _xprd(xprd),_yprd(yprd),_zprd(zprd),
+    _xy(xy),_xz(xz),_yz(yz) {
+    _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
+    _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
+  };
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+        const int j = _list(_iswap,i);
+      if (PBC_FLAG == 0) {
+          _xw(i+_nfirst,0) = _x(j,0);
+          _xw(i+_nfirst,1) = _x(j,1);
+          _xw(i+_nfirst,2) = _x(j,2);
+      } else {
+        if (TRICLINIC == 0) {
+          _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd;
+          _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd;
+          _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
+        } else {
+          _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
+          _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
+          _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
+        }
+      }
+
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecMolecularKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
+                                           const int & iswap,
+                                           const int nfirst, const int &pbc_flag,
+                                           const int* const pbc) {
+  if(commKK->forward_comm_on_host) {
+    sync(Host,X_MASK);
+    modified(Host,X_MASK);
+    if(pbc_flag) {
+      if(domain->triclinic) {
+      struct AtomVecMolecularKokkos_PackCommSelf<LMPHostType,1,1>
+        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      } else {
+      struct AtomVecMolecularKokkos_PackCommSelf<LMPHostType,1,0>
+        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      }
+    } else {
+      if(domain->triclinic) {
+      struct AtomVecMolecularKokkos_PackCommSelf<LMPHostType,0,1>
+        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      } else {
+      struct AtomVecMolecularKokkos_PackCommSelf<LMPHostType,0,0>
+        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      }
+    }
+    LMPHostType::fence();
+  } else {
+    sync(Device,X_MASK);
+    modified(Device,X_MASK);
+    if(pbc_flag) {
+      if(domain->triclinic) {
+      struct AtomVecMolecularKokkos_PackCommSelf<LMPDeviceType,1,1>
+        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      } else {
+      struct AtomVecMolecularKokkos_PackCommSelf<LMPDeviceType,1,0>
+        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      }
+    } else {
+      if(domain->triclinic) {
+      struct AtomVecMolecularKokkos_PackCommSelf<LMPDeviceType,0,1>
+        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      } else {
+      struct AtomVecMolecularKokkos_PackCommSelf<LMPDeviceType,0,0>
+        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      }
+    }
+    LMPDeviceType::fence();
+  }
+	return n*3;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecMolecularKokkos_UnpackComm {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_x_array _x;
+  typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf;
+  int _first;
+
+  AtomVecMolecularKokkos_UnpackComm(
+      const typename DAT::tdual_x_array &x,
+      const typename DAT::tdual_xfloat_2d &buf,
+      const int& first):_x(x.view<DeviceType>()),_buf(buf.view<DeviceType>()),
+                        _first(first) {};
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+      _x(i+_first,0) = _buf(i,0);
+      _x(i+_first,1) = _buf(i,1);
+      _x(i+_first,2) = _buf(i,2);
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecMolecularKokkos::unpack_comm_kokkos(const int &n, const int &first,
+    const DAT::tdual_xfloat_2d &buf ) {
+  if(commKK->forward_comm_on_host) {
+    sync(Host,X_MASK);
+    modified(Host,X_MASK);
+    struct AtomVecMolecularKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
+    Kokkos::parallel_for(n,f);
+    LMPDeviceType::fence();
+  } else {
+    sync(Device,X_MASK);
+    modified(Device,X_MASK);
+    struct AtomVecMolecularKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
+    Kokkos::parallel_for(n,f);
+    LMPDeviceType::fence();
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecMolecularKokkos::pack_comm(int n, int *list, double *buf,
+                                      int pbc_flag, int *pbc)
+{
+  int i,j,m;
+  double dx,dy,dz;
+
+  m = 0;
+  if (pbc_flag == 0) {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0);
+      buf[m++] = h_x(j,1);
+      buf[m++] = h_x(j,2);
+    }
+  } else {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
+      dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
+      dz = pbc[2]*domain->zprd;
+    }
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0) + dx;
+      buf[m++] = h_x(j,1) + dy;
+      buf[m++] = h_x(j,2) + dz;
+    }
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecMolecularKokkos::pack_comm_vel(int n, int *list, double *buf,
+                                          int pbc_flag, int *pbc)
+{
+  int i,j,m;
+  double dx,dy,dz,dvx,dvy,dvz;
+
+  m = 0;
+  if (pbc_flag == 0) {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0);
+      buf[m++] = h_x(j,1);
+      buf[m++] = h_x(j,2);
+      buf[m++] = h_v(j,0);
+      buf[m++] = h_v(j,1);
+      buf[m++] = h_v(j,2);
+    }
+  } else {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
+      dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
+      dz = pbc[2]*domain->zprd;
+    }
+    if (!deform_vremap) {
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0) + dx;
+        buf[m++] = h_x(j,1) + dy;
+        buf[m++] = h_x(j,2) + dz;
+        buf[m++] = h_v(j,0);
+        buf[m++] = h_v(j,1);
+        buf[m++] = h_v(j,2);
+      }
+    } else {
+      dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
+      dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
+      dvz = pbc[2]*h_rate[2];
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0) + dx;
+        buf[m++] = h_x(j,1) + dy;
+        buf[m++] = h_x(j,2) + dz;
+        if (mask[i] & deform_groupbit) {
+          buf[m++] = h_v(j,0) + dvx;
+          buf[m++] = h_v(j,1) + dvy;
+          buf[m++] = h_v(j,2) + dvz;
+        } else {
+          buf[m++] = h_v(j,0);
+          buf[m++] = h_v(j,1);
+          buf[m++] = h_v(j,2);
+        }
+      }
+    }
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecMolecularKokkos::unpack_comm(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    h_x(i,0) = buf[m++];
+    h_x(i,1) = buf[m++];
+    h_x(i,2) = buf[m++];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecMolecularKokkos::unpack_comm_vel(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    h_x(i,0) = buf[m++];
+    h_x(i,1) = buf[m++];
+    h_x(i,2) = buf[m++];
+    h_v(i,0) = buf[m++];
+    h_v(i,1) = buf[m++];
+    h_v(i,2) = buf[m++];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecMolecularKokkos::pack_reverse(int n, int first, double *buf)
+{
+  if(n > 0)
+    sync(Host,F_MASK);
+
+  int m = 0;
+  const int last = first + n;
+  for (int i = first; i < last; i++) {
+    buf[m++] = h_f(i,0);
+    buf[m++] = h_f(i,1);
+    buf[m++] = h_f(i,2);
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecMolecularKokkos::unpack_reverse(int n, int *list, double *buf)
+{
+  if(n > 0)
+    modified(Host,F_MASK);
+
+  int m = 0;
+  for (int i = 0; i < n; i++) {
+    const int j = list[i];
+    h_f(j,0) += buf[m++];
+    h_f(j,1) += buf[m++];
+    h_f(j,2) += buf[m++];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType,int PBC_FLAG>
+struct AtomVecMolecularKokkos_PackBorder {
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+
+  typename AT::t_xfloat_2d _buf;
+  const typename AT::t_int_2d_const _list;
+  const int _iswap;
+  const typename AT::t_x_array_randomread _x;
+  const typename AT::t_tagint_1d _tag;
+  const typename AT::t_int_1d _type;
+  const typename AT::t_int_1d _mask;
+  const typename AT::t_tagint_1d _molecule;
+  X_FLOAT _dx,_dy,_dz;
+
+  AtomVecMolecularKokkos_PackBorder(
+      const typename AT::t_xfloat_2d &buf,
+      const typename AT::t_int_2d_const &list,
+      const int & iswap,
+      const typename AT::t_x_array &x,
+      const typename AT::t_tagint_1d &tag,
+      const typename AT::t_int_1d &type,
+      const typename AT::t_int_1d &mask,
+      const typename AT::t_tagint_1d &molecule,
+      const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz):
+      _buf(buf),_list(list),_iswap(iswap),
+      _x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule),
+      _dx(dx),_dy(dy),_dz(dz) {}
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+      const int j = _list(_iswap,i);
+      if (PBC_FLAG == 0) {
+          _buf(i,0) = _x(j,0);
+          _buf(i,1) = _x(j,1);
+          _buf(i,2) = _x(j,2);
+          _buf(i,3) = _tag(j);
+          _buf(i,4) = _type(j);
+          _buf(i,5) = _mask(j);
+          _buf(i,6) = _molecule(j);
+      } else {
+          _buf(i,0) = _x(j,0) + _dx;
+          _buf(i,1) = _x(j,1) + _dy;
+          _buf(i,2) = _x(j,2) + _dz;
+          _buf(i,3) = _tag(j);
+          _buf(i,4) = _type(j);
+          _buf(i,5) = _mask(j);
+          _buf(i,6) = _molecule(j);
+      }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecMolecularKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
+                                               DAT::tdual_xfloat_2d buf,int iswap,
+                                               int pbc_flag, int *pbc, ExecutionSpace space)
+{
+  X_FLOAT dx,dy,dz;
+
+  if (pbc_flag != 0) {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0];
+      dy = pbc[1];
+      dz = pbc[2];
+    }
+    if(space==Host) {
+      AtomVecMolecularKokkos_PackBorder<LMPHostType,1> f(
+        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
+        iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+      LMPHostType::fence();
+    } else {
+      AtomVecMolecularKokkos_PackBorder<LMPDeviceType,1> f(
+        buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
+        iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+      LMPDeviceType::fence();
+    }
+
+  } else {
+    dx = dy = dz = 0;
+    if(space==Host) {
+      AtomVecMolecularKokkos_PackBorder<LMPHostType,0> f(
+        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
+        iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+      LMPHostType::fence();
+    } else {
+      AtomVecMolecularKokkos_PackBorder<LMPDeviceType,0> f(
+        buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
+        iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+      LMPDeviceType::fence();
+    }
+  }
+  return n*size_border;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecMolecularKokkos::pack_border(int n, int *list, double *buf,
+                                        int pbc_flag, int *pbc)
+{
+  int i,j,m;
+  double dx,dy,dz;
+
+  m = 0;
+  if (pbc_flag == 0) {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0);
+      buf[m++] = h_x(j,1);
+      buf[m++] = h_x(j,2);
+      buf[m++] = ubuf(h_tag(j)).d;
+      buf[m++] = ubuf(h_type(j)).d;
+      buf[m++] = ubuf(h_mask(j)).d;
+      buf[m++] = ubuf(h_molecule(j)).d;
+    }
+  } else {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0];
+      dy = pbc[1];
+      dz = pbc[2];
+    }
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0) + dx;
+      buf[m++] = h_x(j,1) + dy;
+      buf[m++] = h_x(j,2) + dz;
+      buf[m++] = ubuf(h_tag(j)).d;
+      buf[m++] = ubuf(h_type(j)).d;
+      buf[m++] = ubuf(h_mask(j)).d;
+      buf[m++] = ubuf(h_molecule(j)).d;
+    }
+  }
+
+  if (atom->nextra_border)
+    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
+      m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]);
+
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecMolecularKokkos::pack_border_vel(int n, int *list, double *buf,
+                                            int pbc_flag, int *pbc)
+{
+  int i,j,m;
+  double dx,dy,dz,dvx,dvy,dvz;
+
+  m = 0;
+  if (pbc_flag == 0) {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0);
+      buf[m++] = h_x(j,1);
+      buf[m++] = h_x(j,2);
+      buf[m++] = ubuf(h_tag(j)).d;
+      buf[m++] = ubuf(h_type(j)).d;
+      buf[m++] = ubuf(h_mask(j)).d;
+      buf[m++] = ubuf(h_molecule(j)).d;
+      buf[m++] = h_v(j,0);
+      buf[m++] = h_v(j,1);
+      buf[m++] = h_v(j,2);
+    }
+  } else {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0];
+      dy = pbc[1];
+      dz = pbc[2];
+    }
+    if (!deform_vremap) {
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0) + dx;
+        buf[m++] = h_x(j,1) + dy;
+        buf[m++] = h_x(j,2) + dz;
+        buf[m++] = ubuf(h_tag(j)).d;
+        buf[m++] = ubuf(h_type(j)).d;
+        buf[m++] = ubuf(h_mask(j)).d;
+        buf[m++] = ubuf(h_molecule(j)).d;
+        buf[m++] = h_v(j,0);
+        buf[m++] = h_v(j,1);
+        buf[m++] = h_v(j,2);
+      }
+    } else {
+      dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
+      dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
+      dvz = pbc[2]*h_rate[2];
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0) + dx;
+        buf[m++] = h_x(j,1) + dy;
+        buf[m++] = h_x(j,2) + dz;
+        buf[m++] = ubuf(h_tag(j)).d;
+        buf[m++] = ubuf(h_type(j)).d;
+        buf[m++] = ubuf(h_mask(j)).d;
+        buf[m++] = ubuf(h_molecule(j)).d;
+        if (mask[i] & deform_groupbit) {
+          buf[m++] = h_v(j,0) + dvx;
+          buf[m++] = h_v(j,1) + dvy;
+          buf[m++] = h_v(j,2) + dvz;
+        } else {
+          buf[m++] = h_v(j,0);
+          buf[m++] = h_v(j,1);
+          buf[m++] = h_v(j,2);
+        }
+      }
+    }
+  }
+
+  if (atom->nextra_border)
+    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
+      m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]);
+
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecMolecularKokkos::pack_border_hybrid(int n, int *list, double *buf)
+{
+  int i,j,m;
+
+  m = 0;
+  for (i = 0; i < n; i++) {
+    j = list[i];
+    buf[m++] = h_molecule(j);
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecMolecularKokkos_UnpackBorder {
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+
+  const typename AT::t_xfloat_2d_const _buf;
+  typename AT::t_x_array _x;
+  typename AT::t_tagint_1d _tag;
+  typename AT::t_int_1d _type;
+  typename AT::t_int_1d _mask;
+  typename AT::t_tagint_1d _molecule;
+  int _first;
+
+
+  AtomVecMolecularKokkos_UnpackBorder(
+      const typename AT::t_xfloat_2d_const &buf,
+      typename AT::t_x_array &x,
+      typename AT::t_tagint_1d &tag,
+      typename AT::t_int_1d &type,
+      typename AT::t_int_1d &mask,
+      typename AT::t_tagint_1d &molecule,
+      const int& first):
+    _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule),
+    _first(first){
+  };
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+      _x(i+_first,0) = _buf(i,0);
+      _x(i+_first,1) = _buf(i,1);
+      _x(i+_first,2) = _buf(i,2);
+      _tag(i+_first) = static_cast<int> (_buf(i,3));
+      _type(i+_first) = static_cast<int>  (_buf(i,4));
+      _mask(i+_first) = static_cast<int>  (_buf(i,5));
+      _molecule(i+_first) = static_cast<int> (_buf(i,6));
+
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecMolecularKokkos::unpack_border_kokkos(const int &n, const int &first,
+                                                  const DAT::tdual_xfloat_2d &buf,
+                                                  ExecutionSpace space) {
+  modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
+  while (first+n >= nmax) grow(0);
+  modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
+  if(space==Host) {
+    struct AtomVecMolecularKokkos_UnpackBorder<LMPHostType>
+      f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_molecule,first);
+    Kokkos::parallel_for(n,f);
+    LMPHostType::fence();
+  } else {
+    struct AtomVecMolecularKokkos_UnpackBorder<LMPDeviceType>
+      f(buf.view<LMPDeviceType>(),d_x,d_tag,d_type,d_mask,d_molecule,first);
+    Kokkos::parallel_for(n,f);
+    LMPDeviceType::fence();
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecMolecularKokkos::unpack_border(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    if (i == nmax) grow(0);
+    modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
+    h_x(i,0) = buf[m++];
+    h_x(i,1) = buf[m++];
+    h_x(i,2) = buf[m++];
+    h_tag(i) =  (tagint)  ubuf(buf[m++]).i;
+    h_type(i) = (int) ubuf(buf[m++]).i;
+    h_mask(i) = (int) ubuf(buf[m++]).i;
+    h_molecule(i) = (tagint) ubuf(buf[m++]).i;
+  }
+
+  if (atom->nextra_border)
+    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
+      m += modify->fix[atom->extra_border[iextra]]->
+        unpack_border(n,first,&buf[m]);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecMolecularKokkos::unpack_border_vel(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    if (i == nmax) grow(0);
+    h_x(i,0) = buf[m++];
+    h_x(i,1) = buf[m++];
+    h_x(i,2) = buf[m++];
+    h_tag(i) =  (tagint)  ubuf(buf[m++]).i;
+    h_type(i) = (int) ubuf(buf[m++]).i;
+    h_mask(i) = (int) ubuf(buf[m++]).i;
+    h_molecule(i) = (tagint) ubuf(buf[m++]).i;
+    h_v(i,0) = buf[m++];
+    h_v(i,1) = buf[m++];
+    h_v(i,2) = buf[m++];
+  }
+
+  if (atom->nextra_border)
+    for (int iextra = 0; iextra < atom->nextra_border; iextra++)
+      m += modify->fix[atom->extra_border[iextra]]->
+        unpack_border(n,first,&buf[m]);
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecMolecularKokkos::unpack_border_hybrid(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++)
+    h_molecule(i) = (tagint) ubuf(buf[m++]).i;
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecMolecularKokkos_PackExchangeFunctor {
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+  typename AT::t_x_array_randomread _x;
+  typename AT::t_v_array_randomread _v;
+  typename AT::t_tagint_1d_randomread _tag;
+  typename AT::t_int_1d_randomread _type;
+  typename AT::t_int_1d_randomread _mask;
+  typename AT::t_imageint_1d_randomread _image;
+  typename AT::t_tagint_1d_randomread _molecule;
+  typename AT::t_int_2d_randomread _nspecial;
+  typename AT::t_tagint_2d_randomread _special;
+  typename AT::t_int_1d_randomread _num_bond;
+  typename AT::t_int_2d_randomread _bond_type;
+  typename AT::t_tagint_2d_randomread _bond_atom;
+  typename AT::t_int_1d_randomread _num_angle;
+  typename AT::t_int_2d_randomread _angle_type;
+  typename AT::t_tagint_2d_randomread _angle_atom1,_angle_atom2,_angle_atom3;
+  typename AT::t_int_1d_randomread _num_dihedral;
+  typename AT::t_int_2d_randomread _dihedral_type;
+  typename AT::t_tagint_2d_randomread _dihedral_atom1,_dihedral_atom2,
+    _dihedral_atom3,_dihedral_atom4;
+  typename AT::t_int_1d_randomread _num_improper;
+  typename AT::t_int_2d_randomread _improper_type;
+  typename AT::t_tagint_2d_randomread _improper_atom1,_improper_atom2,
+    _improper_atom3,_improper_atom4;
+  typename AT::t_x_array _xw;
+  typename AT::t_v_array _vw;
+  typename AT::t_tagint_1d _tagw;
+  typename AT::t_int_1d _typew;
+  typename AT::t_int_1d _maskw;
+  typename AT::t_imageint_1d _imagew;
+  typename AT::t_tagint_1d _moleculew;
+  typename AT::t_int_2d _nspecialw;
+  typename AT::t_tagint_2d _specialw;
+  typename AT::t_int_1d _num_bondw;
+  typename AT::t_int_2d _bond_typew;
+  typename AT::t_tagint_2d _bond_atomw;
+  typename AT::t_int_1d _num_anglew;
+  typename AT::t_int_2d _angle_typew;
+  typename AT::t_tagint_2d _angle_atom1w,_angle_atom2w,_angle_atom3w;
+  typename AT::t_int_1d _num_dihedralw;
+  typename AT::t_int_2d _dihedral_typew;
+  typename AT::t_tagint_2d _dihedral_atom1w,_dihedral_atom2w,
+    _dihedral_atom3w,_dihedral_atom4w;
+  typename AT::t_int_1d _num_improperw;
+  typename AT::t_int_2d _improper_typew;
+  typename AT::t_tagint_2d _improper_atom1w,_improper_atom2w,
+    _improper_atom3w,_improper_atom4w;
+  typename AT::t_xfloat_2d_um _buf;
+  typename AT::t_int_1d_const _sendlist;
+  typename AT::t_int_1d_const _copylist;
+  int _nlocal,_dim;
+  X_FLOAT _lo,_hi;
+  size_t elements;
+
+  AtomVecMolecularKokkos_PackExchangeFunctor(
+      const AtomKokkos* atom,
+      const typename AT::tdual_xfloat_2d buf,
+      typename AT::tdual_int_1d sendlist,
+      typename AT::tdual_int_1d copylist,int nlocal, int dim,
+                X_FLOAT lo, X_FLOAT hi):
+    _x(atom->k_x.view<DeviceType>()),
+    _v(atom->k_v.view<DeviceType>()),
+    _tag(atom->k_tag.view<DeviceType>()),
+    _type(atom->k_type.view<DeviceType>()),
+    _mask(atom->k_mask.view<DeviceType>()),
+    _image(atom->k_image.view<DeviceType>()),
+    _molecule(atom->k_molecule.view<DeviceType>()),
+    _nspecial(atom->k_nspecial.view<DeviceType>()),
+    _special(atom->k_special.view<DeviceType>()),
+    _num_bond(atom->k_num_bond.view<DeviceType>()),
+    _bond_type(atom->k_bond_type.view<DeviceType>()),
+    _bond_atom(atom->k_bond_atom.view<DeviceType>()),
+    _num_angle(atom->k_num_angle.view<DeviceType>()),
+    _angle_type(atom->k_angle_type.view<DeviceType>()),
+    _angle_atom1(atom->k_angle_atom1.view<DeviceType>()),
+    _angle_atom2(atom->k_angle_atom2.view<DeviceType>()),
+    _angle_atom3(atom->k_angle_atom3.view<DeviceType>()),
+    _num_dihedral(atom->k_num_dihedral.view<DeviceType>()),
+    _dihedral_type(atom->k_dihedral_type.view<DeviceType>()),
+    _dihedral_atom1(atom->k_dihedral_atom1.view<DeviceType>()),
+    _dihedral_atom2(atom->k_dihedral_atom2.view<DeviceType>()),
+    _dihedral_atom3(atom->k_dihedral_atom3.view<DeviceType>()),
+    _dihedral_atom4(atom->k_dihedral_atom4.view<DeviceType>()),
+    _num_improper(atom->k_num_improper.view<DeviceType>()),
+    _improper_type(atom->k_improper_type.view<DeviceType>()),
+    _improper_atom1(atom->k_improper_atom1.view<DeviceType>()),
+    _improper_atom2(atom->k_improper_atom2.view<DeviceType>()),
+    _improper_atom3(atom->k_improper_atom3.view<DeviceType>()),
+    _improper_atom4(atom->k_improper_atom4.view<DeviceType>()),
+    _xw(atom->k_x.view<DeviceType>()),
+    _vw(atom->k_v.view<DeviceType>()),
+    _tagw(atom->k_tag.view<DeviceType>()),
+    _typew(atom->k_type.view<DeviceType>()),
+    _maskw(atom->k_mask.view<DeviceType>()),
+    _imagew(atom->k_image.view<DeviceType>()),
+    _moleculew(atom->k_molecule.view<DeviceType>()),
+    _nspecialw(atom->k_nspecial.view<DeviceType>()),
+    _specialw(atom->k_special.view<DeviceType>()),
+    _num_bondw(atom->k_num_bond.view<DeviceType>()),
+    _bond_typew(atom->k_bond_type.view<DeviceType>()),
+    _bond_atomw(atom->k_bond_atom.view<DeviceType>()),
+    _num_anglew(atom->k_num_angle.view<DeviceType>()),
+    _angle_typew(atom->k_angle_type.view<DeviceType>()),
+    _angle_atom1w(atom->k_angle_atom1.view<DeviceType>()),
+    _angle_atom2w(atom->k_angle_atom2.view<DeviceType>()),
+    _angle_atom3w(atom->k_angle_atom3.view<DeviceType>()),
+    _num_dihedralw(atom->k_num_dihedral.view<DeviceType>()),
+    _dihedral_typew(atom->k_dihedral_type.view<DeviceType>()),
+    _dihedral_atom1w(atom->k_dihedral_atom1.view<DeviceType>()),
+    _dihedral_atom2w(atom->k_dihedral_atom2.view<DeviceType>()),
+    _dihedral_atom3w(atom->k_dihedral_atom3.view<DeviceType>()),
+    _dihedral_atom4w(atom->k_dihedral_atom4.view<DeviceType>()),
+    _num_improperw(atom->k_num_improper.view<DeviceType>()),
+    _improper_typew(atom->k_improper_type.view<DeviceType>()),
+    _improper_atom1w(atom->k_improper_atom1.view<DeviceType>()),
+    _improper_atom2w(atom->k_improper_atom2.view<DeviceType>()),
+    _improper_atom3w(atom->k_improper_atom3.view<DeviceType>()),
+    _improper_atom4w(atom->k_improper_atom4.view<DeviceType>()),
+    _sendlist(sendlist.template view<DeviceType>()),
+    _copylist(copylist.template view<DeviceType>()),
+    _nlocal(nlocal),_dim(dim),
+    _lo(lo),_hi(hi){
+    // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial,
+    // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom,
+    // 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2,
+    // and angle_atom3
+    // 1 num_dihedral, dihedral_per_atom dihedral_type, 4*dihedral_per_atom
+    // 1 num_improper, 5*improper_per_atom
+    // 1 to store buffer length
+    elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
+      5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
+    const int maxsendlist = (buf.template view<DeviceType>().dimension_0()*
+			     buf.template view<DeviceType>().dimension_1())/elements;
+    buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int &mysend) const {
+    int k;
+    const int i = _sendlist(mysend);
+    _buf(mysend,0) = elements;
+    int m = 1;
+    _buf(mysend,m++) = _x(i,0);
+    _buf(mysend,m++) = _x(i,1);
+    _buf(mysend,m++) = _x(i,2);
+    _buf(mysend,m++) = _v(i,0);
+    _buf(mysend,m++) = _v(i,1);
+    _buf(mysend,m++) = _v(i,2);
+    _buf(mysend,m++) = _tag(i);
+    _buf(mysend,m++) = _type(i);
+    _buf(mysend,m++) = _mask(i);
+    _buf(mysend,m++) = _image(i);
+    _buf(mysend,m++) = _molecule(i);
+    _buf(mysend,m++) = _num_bond(i);
+    for (k = 0; k < _num_bond(i); k++) {
+      _buf(mysend,m++) = _bond_type(i,k);
+      _buf(mysend,m++) = _bond_atom(i,k);
+    }
+    _buf(mysend,m++) = _num_angle(i);
+    for (k = 0; k < _num_angle(i); k++) {
+      _buf(mysend,m++) = _angle_type(i,k);
+      _buf(mysend,m++) = _angle_atom1(i,k);
+      _buf(mysend,m++) = _angle_atom2(i,k);
+      _buf(mysend,m++) = _angle_atom3(i,k);
+    }
+    _buf(mysend,m++) = _num_dihedral(i);
+    for (k = 0; k < _num_dihedral(i); k++) {
+      _buf(mysend,m++) = _dihedral_type(i,k);
+      _buf(mysend,m++) = _dihedral_atom1(i,k);
+      _buf(mysend,m++) = _dihedral_atom2(i,k);
+      _buf(mysend,m++) = _dihedral_atom3(i,k);
+      _buf(mysend,m++) = _dihedral_atom4(i,k);
+    }
+    _buf(mysend,m++) = _num_improper(i);
+    for (k = 0; k < _num_improper(i); k++) {
+      _buf(mysend,m++) = _improper_type(i,k);
+      _buf(mysend,m++) = _improper_atom1(i,k);
+      _buf(mysend,m++) = _improper_atom2(i,k);
+      _buf(mysend,m++) = _improper_atom3(i,k);
+      _buf(mysend,m++) = _improper_atom4(i,k);
+    }
+
+    _buf(mysend,m++) = _nspecial(i,0);
+    _buf(mysend,m++) = _nspecial(i,1);
+    _buf(mysend,m++) = _nspecial(i,2);
+    for (k = 0; k < _nspecial(i,2); k++)
+      _buf(mysend,m++) = _special(i,k);
+
+    const int j = _copylist(mysend);
+
+    if(j>-1) {
+    _xw(i,0) = _x(j,0);
+    _xw(i,1) = _x(j,1);
+    _xw(i,2) = _x(j,2);
+    _vw(i,0) = _v(j,0);
+    _vw(i,1) = _v(j,1);
+    _vw(i,2) = _v(j,2);
+    _tagw(i) = _tag(j);
+    _typew(i) = _type(j);
+    _maskw(i) = _mask(j);
+    _imagew(i) = _image(j);
+    _moleculew(i) = _molecule(j);
+    _num_bondw(i) = _num_bond(j);
+    for (k = 0; k < _num_bond(j); k++) {
+      _bond_typew(i,k) = _bond_type(j,k);
+      _bond_atomw(i,k) = _bond_atom(j,k);
+    }
+    _num_anglew(i) = _num_angle(j);
+    for (k = 0; k < _num_angle(j); k++) {
+      _angle_typew(i,k) = _angle_type(j,k);
+      _angle_atom1w(i,k) = _angle_atom1(j,k);
+      _angle_atom2w(i,k) = _angle_atom2(j,k);
+      _angle_atom3w(i,k) = _angle_atom3(j,k);
+    }
+    _num_dihedralw(i) = _num_dihedral(j);
+    for (k = 0; k < _num_dihedral(j); k++) {
+      _dihedral_typew(i,k) = _dihedral_type(j,k);
+      _dihedral_atom1w(i,k) = _dihedral_atom1(j,k);
+      _dihedral_atom2w(i,k) = _dihedral_atom2(j,k);
+      _dihedral_atom3w(i,k) = _dihedral_atom3(j,k);
+      _dihedral_atom4w(i,k) = _dihedral_atom4(j,k);
+    }
+    _num_improperw(i) = _num_improper(j);
+    for (k = 0; k < _num_improper(j); k++) {
+      _improper_typew(i,k) = _improper_type(j,k);
+      _improper_atom1w(i,k) = _improper_atom1(j,k);
+      _improper_atom2w(i,k) = _improper_atom2(j,k);
+      _improper_atom3w(i,k) = _improper_atom3(j,k);
+      _improper_atom4w(i,k) = _improper_atom4(j,k);
+    }
+    _nspecialw(i,0) = _nspecial(j,0);
+    _nspecialw(i,1) = _nspecial(j,1);
+    _nspecialw(i,2) = _nspecial(j,2);
+    for (k = 0; k < _nspecial(j,2); k++)
+      _specialw(i,k) = _special(j,k);
+    }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecMolecularKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf,
+                                                 DAT::tdual_int_1d k_sendlist,
+                                                 DAT::tdual_int_1d k_copylist,
+                                                 ExecutionSpace space,int dim,X_FLOAT lo,
+                                                 X_FLOAT hi )
+{
+  const int elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
+      5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
+  if(nsend > (int) (k_buf.view<LMPHostType>().dimension_0()*
+	      k_buf.view<LMPHostType>().dimension_1())/elements) {
+    int newsize = nsend*elements/k_buf.view<LMPHostType>().dimension_1()+1;
+    k_buf.resize(newsize,k_buf.view<LMPHostType>().dimension_1());
+  }
+  if(space == Host) {
+    AtomVecMolecularKokkos_PackExchangeFunctor<LMPHostType>
+      f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
+    Kokkos::parallel_for(nsend,f);
+    LMPHostType::fence();
+    return nsend*elements;
+  } else {
+    AtomVecMolecularKokkos_PackExchangeFunctor<LMPDeviceType>
+      f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
+    Kokkos::parallel_for(nsend,f);
+    LMPDeviceType::fence();
+    return nsend*elements;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecMolecularKokkos::pack_exchange(int i, double *buf)
+{
+  int k;
+  int m = 1;
+  buf[m++] = h_x(i,0);
+  buf[m++] = h_x(i,1);
+  buf[m++] = h_x(i,2);
+  buf[m++] = h_v(i,0);
+  buf[m++] = h_v(i,1);
+  buf[m++] = h_v(i,2);
+  buf[m++] = ubuf(h_tag(i)).d;
+  buf[m++] = ubuf(h_type(i)).d;
+  buf[m++] = ubuf(h_mask(i)).d;
+  buf[m++] = ubuf(h_image(i)).d;
+  buf[m++] = ubuf(h_molecule(i)).d;
+
+  buf[m++] = ubuf(h_num_bond(i)).d;
+  for (k = 0; k < h_num_bond(i); k++) {
+    buf[m++] = ubuf(h_bond_type(i,k)).d;
+    buf[m++] = ubuf(h_bond_atom(i,k)).d;
+  }
+  buf[m++] = ubuf(h_num_angle(i)).d;
+  for (k = 0; k < h_num_angle(i); k++) {
+    buf[m++] = ubuf(h_angle_type(i,k)).d;
+    buf[m++] = ubuf(h_angle_atom1(i,k)).d;
+    buf[m++] = ubuf(h_angle_atom2(i,k)).d;
+    buf[m++] = ubuf(h_angle_atom3(i,k)).d;
+  }
+  buf[m++] = ubuf(h_num_dihedral(i)).d;
+  for (k = 0; k < h_num_dihedral(i); k++) {
+    buf[m++] = ubuf(h_dihedral_type(i,k)).d;
+    buf[m++] = ubuf(h_dihedral_atom1(i,k)).d;
+    buf[m++] = ubuf(h_dihedral_atom2(i,k)).d;
+    buf[m++] = ubuf(h_dihedral_atom3(i,k)).d;
+    buf[m++] = ubuf(h_dihedral_atom4(i,k)).d;
+  }
+  buf[m++] = ubuf(h_num_improper(i)).d;
+  for (k = 0; k < h_num_improper(i); k++) {
+    buf[m++] = ubuf(h_improper_type(i,k)).d;
+    buf[m++] = ubuf(h_improper_atom1(i,k)).d;
+    buf[m++] = ubuf(h_improper_atom2(i,k)).d;
+    buf[m++] = ubuf(h_improper_atom3(i,k)).d;
+    buf[m++] = ubuf(h_improper_atom4(i,k)).d;
+  }
+  buf[m++] = ubuf(h_nspecial(i,0)).d;
+  buf[m++] = ubuf(h_nspecial(i,1)).d;
+  buf[m++] = ubuf(h_nspecial(i,2)).d;
+  for (k = 0; k < h_nspecial(i,2); k++)
+    buf[m++] = ubuf(h_special(i,k)).d;
+
+  if (atom->nextra_grow)
+    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
+      m += modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&buf[m]);
+
+  buf[0] = m;
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecMolecularKokkos_UnpackExchangeFunctor {
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+  typename AT::t_x_array _x;
+  typename AT::t_v_array _v;
+  typename AT::t_tagint_1d _tag;
+  typename AT::t_int_1d _type;
+  typename AT::t_int_1d _mask;
+  typename AT::t_imageint_1d _image;
+  typename AT::t_tagint_1d _molecule;
+  typename AT::t_int_2d _nspecial;
+  typename AT::t_tagint_2d _special;
+  typename AT::t_int_1d _num_bond;
+  typename AT::t_int_2d _bond_type;
+  typename AT::t_tagint_2d _bond_atom;
+  typename AT::t_int_1d _num_angle;
+  typename AT::t_int_2d _angle_type;
+  typename AT::t_tagint_2d _angle_atom1,_angle_atom2,_angle_atom3;
+  typename AT::t_int_1d _num_dihedral;
+  typename AT::t_int_2d _dihedral_type;
+  typename AT::t_tagint_2d _dihedral_atom1,_dihedral_atom2,
+    _dihedral_atom3,_dihedral_atom4;
+  typename AT::t_int_1d _num_improper;
+  typename AT::t_int_2d _improper_type;
+  typename AT::t_tagint_2d _improper_atom1,_improper_atom2,
+    _improper_atom3,_improper_atom4;
+
+  typename AT::t_xfloat_2d_um _buf;
+  typename AT::t_int_1d _nlocal;
+  int _dim;
+  X_FLOAT _lo,_hi;
+  size_t elements;
+
+  AtomVecMolecularKokkos_UnpackExchangeFunctor(
+      const AtomKokkos* atom,
+      const typename AT::tdual_xfloat_2d buf,
+      typename AT::tdual_int_1d nlocal,
+      int dim, X_FLOAT lo, X_FLOAT hi):
+    _x(atom->k_x.view<DeviceType>()),
+    _v(atom->k_v.view<DeviceType>()),
+    _tag(atom->k_tag.view<DeviceType>()),
+    _type(atom->k_type.view<DeviceType>()),
+    _mask(atom->k_mask.view<DeviceType>()),
+    _image(atom->k_image.view<DeviceType>()),
+    _molecule(atom->k_molecule.view<DeviceType>()),
+    _nspecial(atom->k_nspecial.view<DeviceType>()),
+    _special(atom->k_special.view<DeviceType>()),
+    _num_bond(atom->k_num_bond.view<DeviceType>()),
+    _bond_type(atom->k_bond_type.view<DeviceType>()),
+    _bond_atom(atom->k_bond_atom.view<DeviceType>()),
+    _num_angle(atom->k_num_angle.view<DeviceType>()),
+    _angle_type(atom->k_angle_type.view<DeviceType>()),
+    _angle_atom1(atom->k_angle_atom1.view<DeviceType>()),
+    _angle_atom2(atom->k_angle_atom2.view<DeviceType>()),
+    _angle_atom3(atom->k_angle_atom3.view<DeviceType>()),
+    _num_dihedral(atom->k_num_dihedral.view<DeviceType>()),
+    _dihedral_type(atom->k_dihedral_type.view<DeviceType>()),
+    _dihedral_atom1(atom->k_dihedral_atom1.view<DeviceType>()),
+    _dihedral_atom2(atom->k_dihedral_atom2.view<DeviceType>()),
+    _dihedral_atom3(atom->k_dihedral_atom3.view<DeviceType>()),
+    _dihedral_atom4(atom->k_dihedral_atom4.view<DeviceType>()),
+    _num_improper(atom->k_num_improper.view<DeviceType>()),
+    _improper_type(atom->k_improper_type.view<DeviceType>()),
+    _improper_atom1(atom->k_improper_atom1.view<DeviceType>()),
+    _improper_atom2(atom->k_improper_atom2.view<DeviceType>()),
+    _improper_atom3(atom->k_improper_atom3.view<DeviceType>()),
+    _improper_atom4(atom->k_improper_atom4.view<DeviceType>()),
+    _nlocal(nlocal.template view<DeviceType>()),_dim(dim),
+    _lo(lo),_hi(hi){
+
+    elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
+      5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
+    const int maxsendlist = (buf.template view<DeviceType>().dimension_0()*
+			     buf.template view<DeviceType>().dimension_1())/elements;
+    buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int &myrecv) const {
+    X_FLOAT x = _buf(myrecv,_dim+1);
+    if (x >= _lo && x < _hi) {
+      int i = Kokkos::atomic_fetch_add(&_nlocal(0),1);
+      int m = 1;
+      _x(i,0) = _buf(myrecv,m++);
+      _x(i,1) = _buf(myrecv,m++);
+      _x(i,2) = _buf(myrecv,m++);
+      _v(i,0) = _buf(myrecv,m++);
+      _v(i,1) = _buf(myrecv,m++);
+      _v(i,2) = _buf(myrecv,m++);
+      _tag(i) = _buf(myrecv,m++);
+      _type(i) = _buf(myrecv,m++);
+      _mask(i) = _buf(myrecv,m++);
+      _image(i) = _buf(myrecv,m++);
+
+      _molecule(i) = _buf(myrecv,m++);
+      _num_bond(i) = _buf(myrecv,m++);
+      int k;
+      for (k = 0; k < _num_bond(i); k++) {
+        _bond_type(i,k) = _buf(myrecv,m++);
+        _bond_atom(i,k) = _buf(myrecv,m++);
+      }
+      _num_angle(i) =  _buf(myrecv,m++);
+      for (k = 0; k < _num_angle(i); k++) {
+        _angle_type(i,k) = _buf(myrecv,m++);
+        _angle_atom1(i,k) = _buf(myrecv,m++);
+        _angle_atom2(i,k) = _buf(myrecv,m++);
+        _angle_atom3(i,k) = _buf(myrecv,m++);
+      }
+      _num_dihedral(i) =  _buf(myrecv,m++);
+      for (k = 0; k < _num_dihedral(i); k++) {
+        _dihedral_type(i,k) = _buf(myrecv,m++);
+        _dihedral_atom1(i,k) = _buf(myrecv,m++);
+        _dihedral_atom2(i,k) = _buf(myrecv,m++);
+        _dihedral_atom3(i,k) = _buf(myrecv,m++);
+        _dihedral_atom4(i,k) = _buf(myrecv,m++);
+      }
+      _num_improper(i) =  _buf(myrecv,m++);
+      for (k = 0; k < _num_improper(i); k++) {
+        _improper_type(i,k) = _buf(myrecv,m++);
+        _improper_atom1(i,k) = _buf(myrecv,m++);
+        _improper_atom2(i,k) = _buf(myrecv,m++);
+        _improper_atom3(i,k) = _buf(myrecv,m++);
+        _improper_atom4(i,k) = _buf(myrecv,m++);
+      }
+      _nspecial(i,0) = _buf(myrecv,m++);
+      _nspecial(i,1) = _buf(myrecv,m++);
+      _nspecial(i,2) = _buf(myrecv,m++);
+      for (k = 0; k < _nspecial(i,2); k++)
+        _special(i,k) = _buf(myrecv,m++);
+    }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecMolecularKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,
+                                                   int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,
+                                                   ExecutionSpace space) {
+  const size_t elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
+    5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
+  if(space == Host) {
+    k_count.h_view(0) = nlocal;
+    AtomVecMolecularKokkos_UnpackExchangeFunctor<LMPHostType>
+      f(atomKK,k_buf,k_count,dim,lo,hi);
+    Kokkos::parallel_for(nrecv/elements,f);
+    LMPHostType::fence();
+    return k_count.h_view(0);
+  } else {
+    k_count.h_view(0) = nlocal;
+    k_count.modify<LMPHostType>();
+    k_count.sync<LMPDeviceType>();
+    AtomVecMolecularKokkos_UnpackExchangeFunctor<LMPDeviceType>
+      f(atomKK,k_buf,k_count,dim,lo,hi);
+    Kokkos::parallel_for(nrecv/elements,f);
+    LMPDeviceType::fence();
+    k_count.modify<LMPDeviceType>();
+    k_count.sync<LMPHostType>();
+
+    return k_count.h_view(0);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecMolecularKokkos::unpack_exchange(double *buf)
+{
+  int nlocal = atom->nlocal;
+  if (nlocal == nmax) grow(0);
+  modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
+           MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK |
+	   ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK);
+
+  int k;
+  int m = 1;
+  h_x(nlocal,0) = buf[m++];
+  h_x(nlocal,1) = buf[m++];
+  h_x(nlocal,2) = buf[m++];
+  h_v(nlocal,0) = buf[m++];
+  h_v(nlocal,1) = buf[m++];
+  h_v(nlocal,2) = buf[m++];
+  h_tag(nlocal) = (tagint) ubuf(buf[m++]).i;
+  h_type(nlocal) = (int) ubuf(buf[m++]).i;
+  h_mask(nlocal) = (int) ubuf(buf[m++]).i;
+  h_image(nlocal) = (imageint) ubuf(buf[m++]).i;
+  h_molecule(nlocal) = (tagint) ubuf(buf[m++]).i;
+
+  h_num_bond(nlocal) = (int) ubuf(buf[m++]).i;
+  for (k = 0; k < h_num_bond(nlocal); k++) {
+    h_bond_type(nlocal,k) = (int) ubuf(buf[m++]).i;
+    h_bond_atom(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+  }
+  h_num_angle(nlocal) = (int) ubuf(buf[m++]).i;
+  for (k = 0; k < h_num_angle(nlocal); k++) {
+    h_angle_type(nlocal,k) = (int) ubuf(buf[m++]).i;
+    h_angle_atom1(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_angle_atom2(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_angle_atom3(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+  }
+  h_num_dihedral(nlocal) = (int) ubuf(buf[m++]).i;
+  for (k = 0; k < h_num_dihedral(nlocal); k++) {
+    h_dihedral_type(nlocal,k) = (int) ubuf(buf[m++]).i;
+    h_dihedral_atom1(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_dihedral_atom2(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_dihedral_atom3(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_dihedral_atom4(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+  }
+  h_num_improper(nlocal) = (int) ubuf(buf[m++]).i;
+  for (k = 0; k < h_num_improper(nlocal); k++) {
+    h_improper_type(nlocal,k) = (int) ubuf(buf[m++]).i;
+    h_improper_atom1(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_improper_atom2(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_improper_atom3(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_improper_atom4(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+  }
+  h_nspecial(nlocal,0) = (int) ubuf(buf[m++]).i;
+  h_nspecial(nlocal,1) = (int) ubuf(buf[m++]).i;
+  h_nspecial(nlocal,2) = (int) ubuf(buf[m++]).i;
+  for (k = 0; k < h_nspecial(nlocal,2); k++)
+   h_special(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+
+  if (atom->nextra_grow)
+    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
+      m += modify->fix[atom->extra_grow[iextra]]->
+        unpack_exchange(nlocal,&buf[m]);
+
+  atom->nlocal++;
+  return m;
+}
+
+/* ----------------------------------------------------------------------
+   size of restart data for all atoms owned by this proc
+   include extra data stored by fixes
+------------------------------------------------------------------------- */
+
+int AtomVecMolecularKokkos::size_restart()
+{
+  int i;
+
+  int nlocal = atom->nlocal;
+  int n = 0;
+  for (i = 0; i < nlocal; i++)
+    n += 16 + 2*num_bond[i] + 4*num_angle[i] +
+      5*num_dihedral[i] + 5*num_improper[i];
+
+  if (atom->nextra_restart)
+    for (int iextra = 0; iextra < atom->nextra_restart; iextra++)
+      for (i = 0; i < nlocal; i++)
+        n += modify->fix[atom->extra_restart[iextra]]->size_restart(i);
+
+  return n;
+}
+
+/* ----------------------------------------------------------------------
+   pack atom I's data for restart file including extra quantities
+   xyz must be 1st 3 values, so that read_restart can test on them
+   molecular types may be negative, but write as positive
+------------------------------------------------------------------------- */
+
+int AtomVecMolecularKokkos::pack_restart(int i, double *buf)
+{
+  int m = 1;
+  buf[m++] = h_x(i,0);
+  buf[m++] = h_x(i,1);
+  buf[m++] = h_x(i,2);
+  buf[m++] = ubuf(h_tag(i)).d;
+  buf[m++] = ubuf(h_type(i)).d;
+  buf[m++] = ubuf(h_mask(i)).d;
+  buf[m++] = ubuf(h_image(i)).d;
+  buf[m++] = h_v(i,0);
+  buf[m++] = h_v(i,1);
+  buf[m++] = h_v(i,2);
+
+  buf[m++] = ubuf(h_molecule(i)).d;
+
+  buf[m++] = ubuf(h_num_bond(i)).d;
+  for (int k = 0; k < h_num_bond(i); k++) {
+    buf[m++] = ubuf(MAX(h_bond_type(i,k),-h_bond_type(i,k))).d;
+    buf[m++] = ubuf(h_bond_atom(i,k)).d;
+  }
+
+  buf[m++] = ubuf(h_num_angle(i)).d;
+  for (int k = 0; k < h_num_angle(i); k++) {
+    buf[m++] = ubuf(MAX(h_angle_type(i,k),-h_angle_type(i,k))).d;
+    buf[m++] = ubuf(h_angle_atom1(i,k)).d;
+    buf[m++] = ubuf(h_angle_atom2(i,k)).d;
+    buf[m++] = ubuf(h_angle_atom3(i,k)).d;
+  }
+
+  buf[m++] = ubuf(h_num_dihedral(i)).d;
+  for (int k = 0; k < h_num_dihedral(i); k++) {
+    buf[m++] = ubuf(MAX(h_dihedral_type(i,k),-h_dihedral_type(i,k))).d;
+    buf[m++] = ubuf(h_dihedral_atom1(i,k)).d;
+    buf[m++] = ubuf(h_dihedral_atom2(i,k)).d;
+    buf[m++] = ubuf(h_dihedral_atom3(i,k)).d;
+    buf[m++] = ubuf(h_dihedral_atom4(i,k)).d;
+  }
+
+  buf[m++] = ubuf(h_num_improper(i)).d;
+  for (int k = 0; k < h_num_improper(i); k++) {
+    buf[m++] = ubuf(MAX(h_improper_type(i,k),-h_improper_type(i,k))).d;
+    buf[m++] = ubuf(h_improper_atom1(i,k)).d;
+    buf[m++] = ubuf(h_improper_atom2(i,k)).d;
+    buf[m++] = ubuf(h_improper_atom3(i,k)).d;
+    buf[m++] = ubuf(h_improper_atom4(i,k)).d;
+  }
+
+  if (atom->nextra_restart)
+    for (int iextra = 0; iextra < atom->nextra_restart; iextra++)
+      m += modify->fix[atom->extra_restart[iextra]]->pack_restart(i,&buf[m]);
+
+  buf[0] = m;
+  return m;
+}
+
+/* ----------------------------------------------------------------------
+   unpack data for one atom from restart file including extra quantities
+------------------------------------------------------------------------- */
+
+int AtomVecMolecularKokkos::unpack_restart(double *buf)
+{
+  int k;
+
+  int nlocal = atom->nlocal;
+  if (nlocal == nmax) {
+    grow(0);
+    if (atom->nextra_store)
+      memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra");
+  }
+
+  int m = 1;
+  h_x(nlocal,0) = buf[m++];
+  h_x(nlocal,1) = buf[m++];
+  h_x(nlocal,2) = buf[m++];
+  h_tag(nlocal) = (tagint) ubuf(buf[m++]).i;
+  h_type(nlocal) = (int) ubuf(buf[m++]).i;
+  h_mask(nlocal) = (int) ubuf(buf[m++]).i;
+  h_image(nlocal) = (imageint) ubuf(buf[m++]).i;
+  h_v(nlocal,0) = buf[m++];
+  h_v(nlocal,1) = buf[m++];
+  h_v(nlocal,2) = buf[m++];
+
+  h_molecule(nlocal) = (tagint) ubuf(buf[m++]).i;
+
+  h_num_bond(nlocal) = (int) ubuf(buf[m++]).i;
+  for (k = 0; k < h_num_bond(nlocal); k++) {
+    h_bond_type(nlocal,k) = (int) ubuf(buf[m++]).i;
+    h_bond_atom(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+  }
+
+  h_num_angle(nlocal) = (int) ubuf(buf[m++]).i;
+  for (k = 0; k < h_num_angle(nlocal); k++) {
+    h_angle_type(nlocal,k) = (int) ubuf(buf[m++]).i;
+    h_angle_atom1(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_angle_atom2(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_angle_atom3(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+  }
+
+  h_num_dihedral(nlocal) = (int) ubuf(buf[m++]).i;
+  for (k = 0; k < h_num_dihedral(nlocal); k++) {
+    h_dihedral_type(nlocal,k) = (int) ubuf(buf[m++]).i;
+    h_dihedral_atom1(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_dihedral_atom2(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_dihedral_atom3(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_dihedral_atom4(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+  }
+
+  h_num_improper(nlocal) = (int) ubuf(buf[m++]).i;
+  for (k = 0; k < h_num_improper(nlocal); k++) {
+    h_improper_type(nlocal,k) = (int) ubuf(buf[m++]).i;
+    h_improper_atom1(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_improper_atom2(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_improper_atom3(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+    h_improper_atom4(nlocal,k) = (tagint) ubuf(buf[m++]).i;
+  }
+
+  h_nspecial(nlocal,0) = h_nspecial(nlocal,1) = h_nspecial(nlocal,2) = 0;
+
+  double **extra = atom->extra;
+  if (atom->nextra_store) {
+    int size = static_cast<int> (ubuf(buf[m++]).i) - m;
+    for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++];
+  }
+
+  atom->nlocal++;
+  return m;
+}
+
+/* ----------------------------------------------------------------------
+   create one atom of itype at coord
+   set other values to defaults
+------------------------------------------------------------------------- */
+
+void AtomVecMolecularKokkos::create_atom(int itype, double *coord)
+{
+  int nlocal = atom->nlocal;
+  if (nlocal == nmax) {
+    atomKK->modified(Host,ALL_MASK);
+    grow(0);
+  }
+  atomKK->modified(Host,ALL_MASK);
+
+  tag[nlocal] = 0;
+  type[nlocal] = itype;
+  h_x(nlocal,0) = coord[0];
+  h_x(nlocal,1) = coord[1];
+  h_x(nlocal,2) = coord[2];
+  h_mask(nlocal) = 1;
+  h_image(nlocal) = ((imageint) IMGMAX << IMG2BITS) |
+    ((imageint) IMGMAX << IMGBITS) | IMGMAX;
+  h_v(nlocal,0) = 0.0;
+  h_v(nlocal,1) = 0.0;
+  h_v(nlocal,2) = 0.0;
+
+  h_molecule(nlocal) = 0;
+  h_num_bond(nlocal) = 0;
+  h_num_angle(nlocal) = 0;
+  h_num_dihedral(nlocal) = 0;
+  h_num_improper(nlocal) = 0;
+  h_nspecial(nlocal,0) = h_nspecial(nlocal,1) = h_nspecial(nlocal,2) = 0;
+
+  atom->nlocal++;
+}
+
+/* ----------------------------------------------------------------------
+   unpack one line from Atoms section of data file
+   initialize other atom quantities
+------------------------------------------------------------------------- */
+
+void AtomVecMolecularKokkos::data_atom(double *coord, imageint imagetmp,
+                                       char **values)
+{
+  int nlocal = atom->nlocal;
+  if (nlocal == nmax) grow(0);
+
+  h_tag(nlocal) = atoi(values[0]);
+  if (h_tag(nlocal) <= 0)
+    error->one(FLERR,"Invalid atom ID in Atoms section of data file");
+
+  h_molecule(nlocal) = atoi(values[1]);
+  if (h_molecule(nlocal) <= 0)
+    error->one(FLERR,"Invalid molecule ID in Atoms section of data file");
+
+  h_type(nlocal) = atoi(values[2]);
+  if (h_type(nlocal) <= 0 || h_type(nlocal) > atom->ntypes)
+    error->one(FLERR,"Invalid atom type in Atoms section of data file");
+
+  h_x(nlocal,0) = coord[0];
+  h_x(nlocal,1) = coord[1];
+  h_x(nlocal,2) = coord[2];
+
+  h_image(nlocal) = imagetmp;
+
+  h_mask(nlocal) = 1;
+  h_v(nlocal,0) = 0.0;
+  h_v(nlocal,1) = 0.0;
+  h_v(nlocal,2) = 0.0;
+  h_num_bond(nlocal) = 0;
+  h_num_angle(nlocal) = 0;
+  h_num_dihedral(nlocal) = 0;
+  h_num_improper(nlocal) = 0;
+
+  atom->nlocal++;
+}
+
+/* ----------------------------------------------------------------------
+   unpack hybrid quantities from one line in Atoms section of data file
+   initialize other atom quantities for this sub-style
+------------------------------------------------------------------------- */
+
+int AtomVecMolecularKokkos::data_atom_hybrid(int nlocal, char **values)
+{
+  h_molecule(nlocal) = atoi(values[0]);
+  h_num_bond(nlocal) = 0;
+  h_num_angle(nlocal) = 0;
+  h_num_dihedral(nlocal) = 0;
+  h_num_improper(nlocal) = 0;
+  return 1;
+}
+
+/* ----------------------------------------------------------------------
+   pack atom info for data file including 3 image flags
+------------------------------------------------------------------------- */
+
+void AtomVecMolecularKokkos::pack_data(double **buf)
+{
+  int nlocal = atom->nlocal;
+  for (int i = 0; i < nlocal; i++) {
+    buf[i][0] = h_tag(i);
+    buf[i][1] = h_molecule(i);
+    buf[i][2] = h_type(i);
+    buf[i][3] = h_x(i,0);
+    buf[i][4] = h_x(i,1);
+    buf[i][5] = h_x(i,2);
+    buf[i][6] = (h_image[i] & IMGMASK) - IMGMAX;
+    buf[i][7] = (h_image[i] >> IMGBITS & IMGMASK) - IMGMAX;
+    buf[i][8] = (h_image[i] >> IMG2BITS) - IMGMAX;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   pack hybrid atom info for data file
+------------------------------------------------------------------------- */
+
+int AtomVecMolecularKokkos::pack_data_hybrid(int i, double *buf)
+{
+  buf[0] = h_molecule(i);
+  return 1;
+}
+
+/* ----------------------------------------------------------------------
+   write atom info to data file including 3 image flags
+------------------------------------------------------------------------- */
+
+void AtomVecMolecularKokkos::write_data(FILE *fp, int n, double **buf)
+{
+  for (int i = 0; i < n; i++)
+    fprintf(fp,"%d %d %d %-1.16e %-1.16e %-1.16e %d %d %d\n",
+            (int) buf[i][0],(int) buf[i][1], (int) buf[i][2],
+	    buf[i][3],buf[i][4],buf[i][5],
+            (int) buf[i][6],(int) buf[i][7],(int) buf[i][8]);
+}
+
+/* ----------------------------------------------------------------------
+   write hybrid atom info to data file
+------------------------------------------------------------------------- */
+
+int AtomVecMolecularKokkos::write_data_hybrid(FILE *fp, double *buf)
+{
+  fprintf(fp," " TAGINT_FORMAT, (tagint) (buf[0]));
+  return 1;
+}
+
+/* ----------------------------------------------------------------------
+   return # of bytes of allocated memory
+------------------------------------------------------------------------- */
+
+bigint AtomVecMolecularKokkos::memory_usage()
+{
+  bigint bytes = 0;
+
+  if (atom->memcheck("tag")) bytes += memory->usage(tag,nmax);
+  if (atom->memcheck("type")) bytes += memory->usage(type,nmax);
+  if (atom->memcheck("mask")) bytes += memory->usage(mask,nmax);
+  if (atom->memcheck("image")) bytes += memory->usage(image,nmax);
+  if (atom->memcheck("x")) bytes += memory->usage(x,nmax,3);
+  if (atom->memcheck("v")) bytes += memory->usage(v,nmax,3);
+  if (atom->memcheck("f")) bytes += memory->usage(f,nmax*commKK->nthreads,3);
+
+  if (atom->memcheck("molecule")) bytes += memory->usage(molecule,nmax);
+  if (atom->memcheck("nspecial")) bytes += memory->usage(nspecial,nmax,3);
+  if (atom->memcheck("special"))
+    bytes += memory->usage(special,nmax,atom->maxspecial);
+
+  if (atom->memcheck("num_bond")) bytes += memory->usage(num_bond,nmax);
+  if (atom->memcheck("bond_type"))
+    bytes += memory->usage(bond_type,nmax,atom->bond_per_atom);
+  if (atom->memcheck("bond_atom"))
+    bytes += memory->usage(bond_atom,nmax,atom->bond_per_atom);
+
+  if (atom->memcheck("num_angle")) bytes += memory->usage(num_angle,nmax);
+  if (atom->memcheck("angle_type"))
+    bytes += memory->usage(angle_type,nmax,atom->angle_per_atom);
+  if (atom->memcheck("angle_atom1"))
+    bytes += memory->usage(angle_atom1,nmax,atom->angle_per_atom);
+  if (atom->memcheck("angle_atom2"))
+    bytes += memory->usage(angle_atom2,nmax,atom->angle_per_atom);
+  if (atom->memcheck("angle_atom3"))
+    bytes += memory->usage(angle_atom3,nmax,atom->angle_per_atom);
+
+  if (atom->memcheck("num_dihedral")) bytes += memory->usage(num_dihedral,nmax);
+  if (atom->memcheck("dihedral_type"))
+    bytes += memory->usage(dihedral_type,nmax,atom->dihedral_per_atom);
+  if (atom->memcheck("dihedral_atom1"))
+    bytes += memory->usage(dihedral_atom1,nmax,atom->dihedral_per_atom);
+  if (atom->memcheck("dihedral_atom2"))
+    bytes += memory->usage(dihedral_atom2,nmax,atom->dihedral_per_atom);
+  if (atom->memcheck("dihedral_atom3"))
+    bytes += memory->usage(dihedral_atom3,nmax,atom->dihedral_per_atom);
+  if (atom->memcheck("dihedral_atom4"))
+    bytes += memory->usage(dihedral_atom4,nmax,atom->dihedral_per_atom);
+  if (atom->memcheck("num_improper")) bytes += memory->usage(num_improper,nmax);
+  if (atom->memcheck("improper_type"))
+    bytes += memory->usage(improper_type,nmax,atom->improper_per_atom);
+  if (atom->memcheck("improper_atom1"))
+    bytes += memory->usage(improper_atom1,nmax,atom->improper_per_atom);
+  if (atom->memcheck("improper_atom2"))
+    bytes += memory->usage(improper_atom2,nmax,atom->improper_per_atom);
+  if (atom->memcheck("improper_atom3"))
+    bytes += memory->usage(improper_atom3,nmax,atom->improper_per_atom);
+  if (atom->memcheck("improper_atom4"))
+    bytes += memory->usage(improper_atom4,nmax,atom->improper_per_atom);
+
+  return bytes;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecMolecularKokkos::sync(ExecutionSpace space, unsigned int mask)
+{
+  if (space == Device) {
+    if (mask & X_MASK) atomKK->k_x.sync<LMPDeviceType>();
+    if (mask & V_MASK) atomKK->k_v.sync<LMPDeviceType>();
+    if (mask & F_MASK) atomKK->k_f.sync<LMPDeviceType>();
+    if (mask & TAG_MASK) atomKK->k_tag.sync<LMPDeviceType>();
+    if (mask & TYPE_MASK) atomKK->k_type.sync<LMPDeviceType>();
+    if (mask & MASK_MASK) atomKK->k_mask.sync<LMPDeviceType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPDeviceType>();
+    if (mask && MOLECULE_MASK) atomKK->k_molecule.sync<LMPDeviceType>();
+    if (mask && SPECIAL_MASK) {
+      atomKK->k_nspecial.sync<LMPDeviceType>();
+      atomKK->k_special.sync<LMPDeviceType>();
+    }
+    if (mask && BOND_MASK) {
+      atomKK->k_num_bond.sync<LMPDeviceType>();
+      atomKK->k_bond_type.sync<LMPDeviceType>();
+      atomKK->k_bond_atom.sync<LMPDeviceType>();
+    }
+    if (mask && ANGLE_MASK) {
+      atomKK->k_num_angle.sync<LMPDeviceType>();
+      atomKK->k_angle_type.sync<LMPDeviceType>();
+      atomKK->k_angle_atom1.sync<LMPDeviceType>();
+      atomKK->k_angle_atom2.sync<LMPDeviceType>();
+      atomKK->k_angle_atom3.sync<LMPDeviceType>();
+    }
+    if (mask && DIHEDRAL_MASK) {
+      atomKK->k_num_dihedral.sync<LMPDeviceType>();
+      atomKK->k_dihedral_type.sync<LMPDeviceType>();
+      atomKK->k_dihedral_atom1.sync<LMPDeviceType>();
+      atomKK->k_dihedral_atom2.sync<LMPDeviceType>();
+      atomKK->k_dihedral_atom3.sync<LMPDeviceType>();
+      atomKK->k_dihedral_atom4.sync<LMPDeviceType>();
+    }
+    if (mask && IMPROPER_MASK) {
+      atomKK->k_num_improper.sync<LMPDeviceType>();
+      atomKK->k_improper_type.sync<LMPDeviceType>();
+      atomKK->k_improper_atom1.sync<LMPDeviceType>();
+      atomKK->k_improper_atom2.sync<LMPDeviceType>();
+      atomKK->k_improper_atom3.sync<LMPDeviceType>();
+      atomKK->k_improper_atom3.sync<LMPDeviceType>();
+    }
+  } else {
+    if (mask & X_MASK) atomKK->k_x.sync<LMPHostType>();
+    if (mask & V_MASK) atomKK->k_v.sync<LMPHostType>();
+    if (mask & F_MASK) atomKK->k_f.sync<LMPHostType>();
+    if (mask & TAG_MASK) atomKK->k_tag.sync<LMPHostType>();
+    if (mask & TYPE_MASK) atomKK->k_type.sync<LMPHostType>();
+    if (mask & MASK_MASK) atomKK->k_mask.sync<LMPHostType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPHostType>();
+    if (mask && MOLECULE_MASK) atomKK->k_molecule.sync<LMPHostType>();
+    if (mask && SPECIAL_MASK) {
+      atomKK->k_nspecial.sync<LMPHostType>();
+      atomKK->k_special.sync<LMPHostType>();
+    }
+    if (mask && BOND_MASK) {
+      atomKK->k_num_bond.sync<LMPHostType>();
+      atomKK->k_bond_type.sync<LMPHostType>();
+      atomKK->k_bond_atom.sync<LMPHostType>();
+    }
+    if (mask && ANGLE_MASK) {
+      atomKK->k_num_angle.sync<LMPHostType>();
+      atomKK->k_angle_type.sync<LMPHostType>();
+      atomKK->k_angle_atom1.sync<LMPHostType>();
+      atomKK->k_angle_atom2.sync<LMPHostType>();
+      atomKK->k_angle_atom3.sync<LMPHostType>();
+    }
+    if (mask && DIHEDRAL_MASK) {
+      atomKK->k_num_dihedral.sync<LMPHostType>();
+      atomKK->k_dihedral_type.sync<LMPHostType>();
+      atomKK->k_dihedral_atom1.sync<LMPHostType>();
+      atomKK->k_dihedral_atom2.sync<LMPHostType>();
+      atomKK->k_dihedral_atom3.sync<LMPHostType>();
+      atomKK->k_dihedral_atom4.sync<LMPHostType>();
+    }
+    if (mask && IMPROPER_MASK) {
+      atomKK->k_num_improper.sync<LMPHostType>();
+      atomKK->k_improper_type.sync<LMPHostType>();
+      atomKK->k_improper_atom1.sync<LMPHostType>();
+      atomKK->k_improper_atom2.sync<LMPHostType>();
+      atomKK->k_improper_atom3.sync<LMPHostType>();
+      atomKK->k_improper_atom3.sync<LMPHostType>();
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecMolecularKokkos::modified(ExecutionSpace space, unsigned int mask)
+{
+  if (space == Device) {
+    if (mask & X_MASK) atomKK->k_x.modify<LMPDeviceType>();
+    if (mask & V_MASK) atomKK->k_v.modify<LMPDeviceType>();
+    if (mask & F_MASK) atomKK->k_f.modify<LMPDeviceType>();
+    if (mask & TAG_MASK) atomKK->k_tag.modify<LMPDeviceType>();
+    if (mask & TYPE_MASK) atomKK->k_type.modify<LMPDeviceType>();
+    if (mask & MASK_MASK) atomKK->k_mask.modify<LMPDeviceType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPDeviceType>();
+    if (mask && MOLECULE_MASK) atomKK->k_molecule.modify<LMPDeviceType>();
+    if (mask && SPECIAL_MASK) {
+      atomKK->k_nspecial.modify<LMPDeviceType>();
+      atomKK->k_special.modify<LMPDeviceType>();
+    }
+    if (mask && BOND_MASK) {
+      atomKK->k_num_bond.modify<LMPDeviceType>();
+      atomKK->k_bond_type.modify<LMPDeviceType>();
+      atomKK->k_bond_atom.modify<LMPDeviceType>();
+    }
+    if (mask && ANGLE_MASK) {
+      atomKK->k_num_angle.modify<LMPDeviceType>();
+      atomKK->k_angle_type.modify<LMPDeviceType>();
+      atomKK->k_angle_atom1.modify<LMPDeviceType>();
+      atomKK->k_angle_atom2.modify<LMPDeviceType>();
+      atomKK->k_angle_atom3.modify<LMPDeviceType>();
+    }
+    if (mask && DIHEDRAL_MASK) {
+      atomKK->k_num_dihedral.modify<LMPDeviceType>();
+      atomKK->k_dihedral_type.modify<LMPDeviceType>();
+      atomKK->k_dihedral_atom1.modify<LMPDeviceType>();
+      atomKK->k_dihedral_atom2.modify<LMPDeviceType>();
+      atomKK->k_dihedral_atom3.modify<LMPDeviceType>();
+      atomKK->k_dihedral_atom4.modify<LMPDeviceType>();
+    }
+    if (mask && IMPROPER_MASK) {
+      atomKK->k_num_improper.modify<LMPDeviceType>();
+      atomKK->k_improper_type.modify<LMPDeviceType>();
+      atomKK->k_improper_atom1.modify<LMPDeviceType>();
+      atomKK->k_improper_atom2.modify<LMPDeviceType>();
+      atomKK->k_improper_atom3.modify<LMPDeviceType>();
+      atomKK->k_improper_atom3.modify<LMPDeviceType>();
+    }
+  } else {
+    if (mask & X_MASK) atomKK->k_x.modify<LMPHostType>();
+    if (mask & V_MASK) atomKK->k_v.modify<LMPHostType>();
+    if (mask & F_MASK) atomKK->k_f.modify<LMPHostType>();
+    if (mask & TAG_MASK) atomKK->k_tag.modify<LMPHostType>();
+    if (mask & TYPE_MASK) atomKK->k_type.modify<LMPHostType>();
+    if (mask & MASK_MASK) atomKK->k_mask.modify<LMPHostType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPHostType>();
+    if (mask && MOLECULE_MASK) atomKK->k_molecule.modify<LMPHostType>();
+    if (mask && SPECIAL_MASK) {
+      atomKK->k_nspecial.modify<LMPHostType>();
+      atomKK->k_special.modify<LMPHostType>();
+    }
+    if (mask && BOND_MASK) {
+      atomKK->k_num_bond.modify<LMPHostType>();
+      atomKK->k_bond_type.modify<LMPHostType>();
+      atomKK->k_bond_atom.modify<LMPHostType>();
+    }
+    if (mask && ANGLE_MASK) {
+      atomKK->k_num_angle.modify<LMPHostType>();
+      atomKK->k_angle_type.modify<LMPHostType>();
+      atomKK->k_angle_atom1.modify<LMPHostType>();
+      atomKK->k_angle_atom2.modify<LMPHostType>();
+      atomKK->k_angle_atom3.modify<LMPHostType>();
+    }
+    if (mask && DIHEDRAL_MASK) {
+      atomKK->k_num_dihedral.modify<LMPHostType>();
+      atomKK->k_dihedral_type.modify<LMPHostType>();
+      atomKK->k_dihedral_atom1.modify<LMPHostType>();
+      atomKK->k_dihedral_atom2.modify<LMPHostType>();
+      atomKK->k_dihedral_atom3.modify<LMPHostType>();
+      atomKK->k_dihedral_atom4.modify<LMPHostType>();
+    }
+    if (mask && IMPROPER_MASK) {
+      atomKK->k_num_improper.modify<LMPHostType>();
+      atomKK->k_improper_type.modify<LMPHostType>();
+      atomKK->k_improper_atom1.modify<LMPHostType>();
+      atomKK->k_improper_atom2.modify<LMPHostType>();
+      atomKK->k_improper_atom3.modify<LMPHostType>();
+      atomKK->k_improper_atom3.modify<LMPHostType>();
+    }
+  }
+}
diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.h b/src/KOKKOS/atom_vec_molecular_kokkos.h
new file mode 100644
index 0000000000000000000000000000000000000000..24093beefdc75646d4fd2a7ee5128ca91498839c
--- /dev/null
+++ b/src/KOKKOS/atom_vec_molecular_kokkos.h
@@ -0,0 +1,178 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef ATOM_CLASS
+
+AtomStyle(molecular/kk,AtomVecMolecularKokkos)
+
+#else
+
+#ifndef LMP_ATOM_VEC_MOLECULAR_KOKKOS_H
+#define LMP_ATOM_VEC_MOLECULAR_KOKKOS_H
+
+#include "atom_vec_kokkos.h"
+
+namespace LAMMPS_NS {
+
+class AtomVecMolecularKokkos : public AtomVecKokkos {
+ public:
+  AtomVecMolecularKokkos(class LAMMPS *);
+  virtual ~AtomVecMolecularKokkos() {}
+  void grow(int);
+  void copy(int, int, int);
+  int pack_comm(int, int *, double *, int, int *);
+  int pack_comm_vel(int, int *, double *, int, int *);
+  void unpack_comm(int, int, double *);
+  void unpack_comm_vel(int, int, double *);
+  int pack_reverse(int, int, double *);
+  void unpack_reverse(int, int *, double *);
+  int pack_border(int, int *, double *, int, int *);
+  int pack_border_vel(int, int *, double *, int, int *);
+  int pack_border_hybrid(int, int *, double *);
+  void unpack_border(int, int, double *);
+  void unpack_border_vel(int, int, double *);
+  int unpack_border_hybrid(int, int, double *);
+  int pack_exchange(int, double *);
+  int unpack_exchange(double *);
+  int size_restart();
+  int pack_restart(int, double *);
+  int unpack_restart(double *);
+  void create_atom(int, double *);
+  void data_atom(double *, tagint, char **);
+  int data_atom_hybrid(int, char **);
+  void pack_data(double **);
+  int pack_data_hybrid(int, double *);
+  void write_data(FILE *, int, double **);
+  int write_data_hybrid(FILE *, double *);
+  bigint memory_usage();
+
+  void grow_reset();
+  int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
+                       const int & iswap,
+                       const DAT::tdual_xfloat_2d &buf,
+                       const int &pbc_flag, const int pbc[]);
+  void unpack_comm_kokkos(const int &n, const int &nfirst,
+                          const DAT::tdual_xfloat_2d &buf);
+  int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
+                     const int & iswap, const int nfirst,
+                     const int &pbc_flag, const int pbc[]);
+  int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
+                         DAT::tdual_xfloat_2d buf,int iswap,
+                         int pbc_flag, int *pbc, ExecutionSpace space);
+  void unpack_border_kokkos(const int &n, const int &nfirst,
+                            const DAT::tdual_xfloat_2d &buf,
+                            ExecutionSpace space);
+  int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
+                           DAT::tdual_int_1d k_sendlist,
+                           DAT::tdual_int_1d k_copylist,
+                           ExecutionSpace space, int dim,
+                           X_FLOAT lo, X_FLOAT hi);
+  int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
+                             int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
+                             ExecutionSpace space);
+
+  void sync(ExecutionSpace space, unsigned int mask);
+  void modified(ExecutionSpace space, unsigned int mask);
+
+ protected:
+
+  tagint *tag;
+  int *type,*mask;
+  imageint *image;
+  double **x,**v,**f;
+
+  tagint *molecule;
+  int **nspecial;
+  tagint **special;
+  int *num_bond;
+  int **bond_type;
+  tagint **bond_atom;
+
+  int *num_angle;
+  int **angle_type;
+  tagint **angle_atom1,**angle_atom2,**angle_atom3;
+
+  int *num_dihedral;
+  int **dihedral_type;
+  tagint **dihedral_atom1,**dihedral_atom2,**dihedral_atom3,**dihedral_atom4;
+  int *num_improper;
+  int **improper_type;
+  tagint **improper_atom1,**improper_atom2,**improper_atom3,**improper_atom4;
+
+  DAT::t_tagint_1d d_tag;
+  DAT::t_int_1d d_type, d_mask;
+  HAT::t_tagint_1d h_tag;
+  HAT::t_int_1d h_type, h_mask;
+
+  DAT::t_imageint_1d d_image;
+  HAT::t_imageint_1d h_image;
+
+  DAT::t_x_array d_x;
+  DAT::t_v_array d_v;
+  DAT::t_f_array d_f;
+  HAT::t_x_array h_x;
+  HAT::t_v_array h_v;
+  HAT::t_f_array h_f;
+
+  DAT::t_tagint_1d d_molecule;
+  DAT::t_int_2d d_nspecial;
+  DAT::t_tagint_2d d_special;
+  DAT::t_int_1d d_num_bond;
+  DAT::t_int_2d d_bond_type;
+  DAT::t_tagint_2d d_bond_atom;
+
+  HAT::t_tagint_1d h_molecule;
+  HAT::t_int_2d h_nspecial;
+  HAT::t_tagint_2d h_special;
+  HAT::t_int_1d h_num_bond;
+  HAT::t_int_2d h_bond_type;
+  HAT::t_tagint_2d h_bond_atom;
+
+  DAT::t_int_1d d_num_angle;
+  DAT::t_int_2d d_angle_type;
+  DAT::t_tagint_2d d_angle_atom1,d_angle_atom2,d_angle_atom3;
+
+  HAT::t_int_1d h_num_angle;
+  HAT::t_int_2d h_angle_type;
+  HAT::t_tagint_2d h_angle_atom1,h_angle_atom2,h_angle_atom3;
+
+  DAT::t_int_1d d_num_dihedral;
+  DAT::t_int_2d d_dihedral_type;
+  DAT::t_tagint_2d d_dihedral_atom1,d_dihedral_atom2,
+    d_dihedral_atom3,d_dihedral_atom4;
+  DAT::t_int_1d d_num_improper;
+  DAT::t_int_2d d_improper_type;
+  DAT::t_tagint_2d d_improper_atom1,d_improper_atom2,
+    d_improper_atom3,d_improper_atom4;
+
+  HAT::t_int_1d h_num_dihedral;
+  HAT::t_int_2d h_dihedral_type;
+  HAT::t_tagint_2d h_dihedral_atom1,h_dihedral_atom2,
+    h_dihedral_atom3,h_dihedral_atom4;
+  HAT::t_int_1d h_num_improper;
+  HAT::t_int_2d h_improper_type;
+  HAT::t_tagint_2d h_improper_atom1,h_improper_atom2,
+    h_improper_atom3,h_improper_atom4;
+
+  HAT::tdual_int_1d k_count;
+
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+*/
diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp
index 5211d11a0286da6fd9afeb2f8228557440a9b6f6..1319503c1e87e39c5e46acadc8fec0914854c1c9 100644
--- a/src/KOKKOS/comm_kokkos.cpp
+++ b/src/KOKKOS/comm_kokkos.cpp
@@ -21,6 +21,13 @@
 #include "atom_masks.h"
 #include "error.h"
 #include "memory.h"
+#include "force.h"
+#include "pair.h"
+#include "fix.h"
+#include "compute.h"
+#include "dump.h"
+#include "output.h"
+#include "modify.h"
 
 using namespace LAMMPS_NS;
 
@@ -43,15 +50,19 @@ CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp)
 
   // initialize comm buffers & exchange memory
 
-  maxsend = BUFMIN;
-  k_buf_send = ArrayTypes<LMPDeviceType>::
-    tdual_xfloat_2d("comm:k_buf_send",(maxsend+BUFEXTRA+5)/6,6);
-  buf_send = k_buf_send.view<LMPHostType>().ptr_on_device();
+  // maxsend = BUFMIN;
+  // k_buf_send = ArrayTypes<LMPDeviceType>::
+  //   tdual_xfloat_2d("comm:k_buf_send",(maxsend+BUFEXTRA+5)/6,6);
+  // buf_send = k_buf_send.view<LMPHostType>().ptr_on_device();
+  maxsend = 0;
+  buf_send = NULL;
 
-  maxrecv = BUFMIN;
-  k_buf_recv = ArrayTypes<LMPDeviceType>::
-    tdual_xfloat_2d("comm:k_buf_recv",(maxrecv+5)/6,6);
-  buf_recv = k_buf_recv.view<LMPHostType>().ptr_on_device();
+  // maxrecv = BUFMIN;
+  // k_buf_recv = ArrayTypes<LMPDeviceType>::
+  //   tdual_xfloat_2d("comm:k_buf_recv",(maxrecv+5)/6,6);
+  // buf_recv = k_buf_recv.view<LMPHostType>().ptr_on_device();
+  maxrecv = 0;
+  buf_recv = NULL;
 
   k_exchange_sendlist = ArrayTypes<LMPDeviceType>::
     tdual_int_1d("comm:k_exchange_sendlist",100);
@@ -89,6 +100,34 @@ void CommKokkos::init()
   forward_comm_on_host = lmp->kokkos->forward_comm_on_host;
 
   CommBrick::init();
+
+  int check_forward = 0;
+  int check_reverse = 0;
+  if (force->pair)
+    check_forward += force->pair->comm_forward;
+  if (force->pair)
+    check_reverse += force->pair->comm_reverse;
+
+  for (int i = 0; i < modify->nfix; i++) {
+    check_forward += modify->fix[i]->comm_forward;
+    check_reverse += modify->fix[i]->comm_reverse;
+  }
+
+  for (int i = 0; i < modify->ncompute; i++) {
+    check_forward += modify->compute[i]->comm_forward;
+    check_reverse += modify->compute[i]->comm_reverse;
+  }
+
+  for (int i = 0; i < output->ndump; i++) {
+    check_forward += output->dump[i]->comm_forward;
+    check_reverse += output->dump[i]->comm_reverse;
+  }
+
+  if (force->newton == 0) check_reverse = 0;
+  if (force->pair) check_reverse += force->pair->comm_reverse_off;
+
+  if(check_reverse || check_forward)
+    forward_comm_classic = true;
 }
 
 /* ----------------------------------------------------------------------
@@ -98,8 +137,7 @@ void CommKokkos::init()
 
 void CommKokkos::forward_comm(int dummy)
 {
-
- if (!forward_comm_classic) {
+  if (!forward_comm_classic) {
     if (forward_comm_on_host) forward_comm_device<LMPHostType>(dummy);
     else forward_comm_device<LMPDeviceType>(dummy);
     return;
@@ -205,6 +243,68 @@ void CommKokkos::forward_comm_device(int dummy)
     }
   }
 }
+void CommKokkos::reverse_comm()
+{
+  k_sendlist.sync<LMPHostType>();
+  if (comm_f_only)
+    atomKK->sync(Host,F_MASK);
+  else
+    atomKK->sync(Host,ALL_MASK);
+  CommBrick::reverse_comm();
+  if (comm_f_only)
+    atomKK->modified(Host,F_MASK);
+  else
+    atomKK->modified(Host,ALL_MASK);
+  atomKK->sync(Device,ALL_MASK);
+}
+
+void CommKokkos::forward_comm_fix(Fix *fix)
+{
+  k_sendlist.sync<LMPHostType>();
+  CommBrick::forward_comm_fix(fix);
+}
+
+void CommKokkos::reverse_comm_fix(Fix *fix)
+{
+  k_sendlist.sync<LMPHostType>();
+  CommBrick::reverse_comm_fix(fix);
+}
+
+void CommKokkos::forward_comm_compute(Compute *compute)
+{
+  k_sendlist.sync<LMPHostType>();
+  CommBrick::forward_comm_compute(compute);
+}
+
+void CommKokkos::reverse_comm_compute(Compute *compute)
+{
+  k_sendlist.sync<LMPHostType>();
+  CommBrick::reverse_comm_compute(compute);
+}
+
+void CommKokkos::forward_comm_pair(Pair *pair)
+{
+  k_sendlist.sync<LMPHostType>();
+  CommBrick::forward_comm_pair(pair);
+}
+
+void CommKokkos::reverse_comm_pair(Pair *pair)
+{
+  k_sendlist.sync<LMPHostType>();
+  CommBrick::reverse_comm_pair(pair);
+}
+
+void CommKokkos::forward_comm_dump(Dump *dump)
+{
+  k_sendlist.sync<LMPHostType>();
+  CommBrick::forward_comm_dump(dump);
+}
+
+void CommKokkos::reverse_comm_dump(Dump *dump)
+{
+  k_sendlist.sync<LMPHostType>();
+  CommBrick::reverse_comm_dump(dump);
+}
 
 /* ----------------------------------------------------------------------
    exchange: move atoms to correct processors
@@ -219,6 +319,16 @@ void CommKokkos::forward_comm_device(int dummy)
 
 void CommKokkos::exchange()
 {
+  if(atom->nextra_grow + atom->nextra_border) {
+    if(!exchange_comm_classic) {
+      static int print = 1;
+      if(print) {
+        error->warning(FLERR,"Kokkos communication does not currently support fixes sending data. Switching to classic communication.");
+        print = 0;
+      }
+      exchange_comm_classic = true;
+    }
+  }
   if (!exchange_comm_classic) {
     if (exchange_comm_on_host) exchange_device<LMPHostType>();
     else exchange_device<LMPDeviceType>();
@@ -463,10 +573,12 @@ void CommKokkos::borders()
   }
 
   atomKK->sync(Host,ALL_MASK);
+
   k_sendlist.modify<LMPHostType>();
   atomKK->modified(Host,ALL_MASK);
-
   CommBrick::borders();
+  k_sendlist.modify<LMPHostType>();
+  atomKK->modified(Host,ALL_MASK);
 }
 
 /* ---------------------------------------------------------------------- */
@@ -496,7 +608,7 @@ struct BuildBorderListFunctor {
 
 
   KOKKOS_INLINE_FUNCTION
-  void operator() (DeviceType dev) const {
+  void operator() (typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const {
     const int chunk = ((nlast - nfirst + dev.league_size() - 1 ) / 
                        dev.league_size());
     const int teamstart = chunk*dev.league_rank() + nfirst;
@@ -517,7 +629,7 @@ struct BuildBorderListFunctor {
     }
   }
 
-  size_t shmem_size() const { return 1000u;}
+  size_t shmem_size(const int team_size) const { (void) team_size; return 1000u;}
 };
 
 /* ---------------------------------------------------------------------- */
@@ -591,16 +703,19 @@ void CommKokkos::borders_device() {
               total_send.template modify<DeviceType>();
               total_send.template sync<LMPDeviceType>();
             }
+
             BuildBorderListFunctor<DeviceType> f(atomKK->k_x,k_sendlist,
                 total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]);
-            Kokkos::ParallelWorkRequest config((nlast-nfirst+127)/128,128);
+            Kokkos::TeamPolicy<DeviceType> config((nlast-nfirst+127)/128,128);
             Kokkos::parallel_for(config,f);
             DeviceType::fence();
+
             total_send.template modify<DeviceType>();
             total_send.template sync<LMPHostType>();
 
             if(total_send.h_view(0) >= maxsendlist[iswap]) {
               grow_list(iswap,total_send.h_view(0));
+              k_sendlist.modify<DeviceType>();
               total_send.h_view(0) = 0;
               if(exec_space == Device) {
                 total_send.template modify<LMPHostType>();
@@ -608,7 +723,7 @@ void CommKokkos::borders_device() {
               }
               BuildBorderListFunctor<DeviceType> f(atomKK->k_x,k_sendlist,
                   total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]);
-              Kokkos::ParallelWorkRequest config((nlast-nfirst+127)/128,128);
+              Kokkos::TeamPolicy<DeviceType> config((nlast-nfirst+127)/128,128);
               Kokkos::parallel_for(config,f);
               DeviceType::fence();
               total_send.template modify<DeviceType>();
@@ -742,6 +857,25 @@ void CommKokkos::borders_device() {
   atomKK->modified(exec_space,ALL_MASK);
   DeviceType::fence();
 }
+/* ----------------------------------------------------------------------
+   realloc the size of the send buffer as needed with BUFFACTOR and bufextra
+   if flag = 1, realloc
+   if flag = 0, don't need to realloc with copy, just free/malloc
+------------------------------------------------------------------------- */
+
+void CommKokkos::grow_send(int n, int flag)
+{
+  grow_send_kokkos(n,flag,Host);
+}
+
+/* ----------------------------------------------------------------------
+   free/malloc the size of the recv buffer as needed with BUFFACTOR
+------------------------------------------------------------------------- */
+
+void CommKokkos::grow_recv(int n)
+{
+  grow_recv_kokkos(n,Host);
+}
 
 /* ----------------------------------------------------------------------
    realloc the size of the send buffer as needed with BUFFACTOR & BUFEXTRA
diff --git a/src/KOKKOS/comm_kokkos.h b/src/KOKKOS/comm_kokkos.h
index 46d3552d2d5464798b6e3c09cfe343573340ef84..539156af09ecdc0cfbc88811d2581b77fb561c55 100644
--- a/src/KOKKOS/comm_kokkos.h
+++ b/src/KOKKOS/comm_kokkos.h
@@ -33,9 +33,19 @@ class CommKokkos : public CommBrick {
   void init();
 
   void forward_comm(int dummy = 0);    // forward comm of atom coords
+  void reverse_comm();              // reverse comm of atom coords
   void exchange();                     // move atoms to new procs
   void borders();                      // setup list of atoms to comm
 
+  void forward_comm_pair(class Pair *);    // forward comm from a Pair
+  void reverse_comm_pair(class Pair *);    // reverse comm from a Pair
+  void forward_comm_fix(class Fix *);      // forward comm from a Fix
+  void reverse_comm_fix(class Fix *);      // reverse comm from a Fix
+  void forward_comm_compute(class Compute *);  // forward from a Compute
+  void reverse_comm_compute(class Compute *);  // reverse from a Compute
+  void forward_comm_dump(class Dump *);    // forward comm from a Dump
+  void reverse_comm_dump(class Dump *);    // reverse comm from a Dump
+
   template<class DeviceType> void forward_comm_device(int dummy);
   template<class DeviceType> void exchange_device();
   template<class DeviceType> void borders_device();
@@ -48,6 +58,8 @@ class CommKokkos : public CommBrick {
   //double *buf_send;                 // send buffer for all comm
   //double *buf_recv;                 // recv buffer for all comm
 
+  void grow_send(int, int);
+  void grow_recv(int);
   void grow_send_kokkos(int, int, ExecutionSpace space = Host);
   void grow_recv_kokkos(int, ExecutionSpace space = Host);
   void grow_list(int, int);
diff --git a/src/KOKKOS/fix_langevin_kokkos.cpp b/src/KOKKOS/fix_langevin_kokkos.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..cd34d3a9648c6794c8b8cac2239c5fe10fcfbdab
--- /dev/null
+++ b/src/KOKKOS/fix_langevin_kokkos.cpp
@@ -0,0 +1,810 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+   ------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "stdio.h"
+#include "string.h"
+#include "fix_langevin_kokkos.h"
+#include "atom_masks.h"
+#include "atom_kokkos.h"
+#include "force.h"
+#include "update.h"
+#include "respa.h"
+#include "error.h"
+#include "memory.h"
+#include "group.h"
+#include "random_mars.h"
+#include "compute.h"
+#include "comm.h"
+#include "modify.h"
+#include "input.h"
+#include "variable.h"
+
+using namespace LAMMPS_NS;
+using namespace FixConst;
+
+enum{NOBIAS,BIAS};
+enum{CONSTANT,EQUAL,ATOM};
+#define SINERTIA 0.4          // moment of inertia prefactor for sphere
+#define EINERTIA 0.2          // moment of inertia prefactor for ellipsoid
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+FixLangevinKokkos<DeviceType>::FixLangevinKokkos(LAMMPS *lmp, int narg, char **arg) :
+  FixLangevin(lmp, narg, arg),rand_pool(seed + comm->me)
+{
+  atomKK = (AtomKokkos *) atom;
+  int ntypes = atomKK->ntypes;
+
+  // allocate per-type arrays for force prefactors
+  memory->create_kokkos(k_gfactor1,gfactor1,ntypes+1,"langevin:gfactor1");
+  memory->create_kokkos(k_gfactor2,gfactor2,ntypes+1,"langevin:gfactor2");
+  memory->create_kokkos(k_ratio,ratio,ntypes+1,"langevin:ratio");
+  d_gfactor1 = k_gfactor1.template view<DeviceType>();
+  h_gfactor1 = k_gfactor1.template view<LMPHostType>();
+  d_gfactor2 = k_gfactor2.template view<DeviceType>();
+  h_gfactor2 = k_gfactor2.template view<LMPHostType>();
+  d_ratio = k_ratio.template view<DeviceType>();
+  h_ratio = k_ratio.template view<LMPHostType>();
+
+  // optional args
+  for (int i = 1; i <= ntypes; i++) ratio[i] = 1.0;
+  k_ratio.template modify<LMPHostType>();
+
+  if(gjfflag){
+    nvalues = 3;
+    grow_arrays(atomKK->nmax);
+    atom->add_callback(0);
+    // initialize franprev to zero
+    for (int i = 0; i < atomKK->nlocal; i++) {
+      franprev[i][0] = 0.0;
+      franprev[i][1] = 0.0;
+      franprev[i][2] = 0.0;
+    }
+    k_franprev.template modify<LMPHostType>();
+  }
+  if(zeroflag){
+    k_fsumall = tdual_double_1d_3n("langevin:fsumall");
+    h_fsumall = k_fsumall.template view<LMPHostType>();
+    d_fsumall = k_fsumall.template view<DeviceType>();
+  }
+
+  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
+  datamask_read =  V_MASK | F_MASK | MASK_MASK | RMASS_MASK | TYPE_MASK;
+  datamask_modify = F_MASK;
+
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+FixLangevinKokkos<DeviceType>::~FixLangevinKokkos()
+{
+  memory->destroy_kokkos(k_gfactor1,gfactor1);
+  memory->destroy_kokkos(k_gfactor2,gfactor2);
+  memory->destroy_kokkos(k_ratio,ratio);
+  memory->destroy_kokkos(k_flangevin,flangevin);
+  if(gjfflag) memory->destroy_kokkos(k_franprev,franprev);
+  memory->destroy_kokkos(k_tforce,tforce);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void FixLangevinKokkos<DeviceType>::init()
+{
+  FixLangevin::init();
+  if(oflag)
+    error->all(FLERR,"Fix langevin omega is not yet implemented with kokkos");
+  if(ascale)
+    error->all(FLERR,"Fix langevin angmom is not yet implemented with kokkos");
+
+  // prefactors are modified in the init
+  k_gfactor1.template modify<LMPHostType>();
+  k_gfactor2.template modify<LMPHostType>();
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void FixLangevinKokkos<DeviceType>::grow_arrays(int nmax)
+{
+  memory->grow_kokkos(k_franprev,franprev,nmax,3,"langevin:franprev");
+  d_franprev = k_franprev.template view<DeviceType>();
+  h_franprev = k_franprev.template view<LMPHostType>();
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void FixLangevinKokkos<DeviceType>::post_force(int vflag)
+{
+  // sync the device views which might have been modified on host
+  atomKK->sync(execution_space,datamask_read);
+  rmass = atomKK->rmass;
+  f = atomKK->k_f.template view<DeviceType>();
+  v = atomKK->k_v.template view<DeviceType>();
+  type = atomKK->k_type.template view<DeviceType>();
+  mask = atomKK->k_mask.template view<DeviceType>();
+
+  k_gfactor1.template sync<DeviceType>();
+  k_gfactor2.template sync<DeviceType>();
+  k_ratio.template sync<DeviceType>();
+  if(gjfflag) k_franprev.template sync<DeviceType>();
+
+  boltz = force->boltz;
+  dt = update->dt;
+  mvv2e = force->mvv2e;
+  ftm2v = force->ftm2v;
+  fran_prop_const = sqrt(24.0*boltz/t_period/dt/mvv2e);
+
+  compute_target(); // modifies tforce vector, hence sync here
+  k_tforce.template sync<DeviceType>();
+
+  double fsum[3],fsumall[3];
+  bigint count;
+  int nlocal = atomKK->nlocal;
+
+  if (zeroflag) {
+    fsum[0] = fsum[1] = fsum[2] = 0.0;
+    count = group->count(igroup);
+    if (count == 0)
+      error->all(FLERR,"Cannot zero Langevin force of 0 atoms");
+  }
+
+  // reallocate flangevin if necessary
+  if (tallyflag) {
+    if (nlocal > maxatom1) {
+      memory->destroy_kokkos(k_flangevin,flangevin);
+      maxatom1 = atomKK->nmax;
+      memory->create_kokkos(k_flangevin,flangevin,maxatom1,3,"langevin:flangevin");
+      d_flangevin = k_flangevin.template view<DeviceType>();
+      h_flangevin = k_flangevin.template view<LMPHostType>();
+    }
+  }
+
+  // account for bias velocity
+  if(tbiasflag == BIAS){
+    temperature->compute_scalar();
+    temperature->remove_bias_all(); // modifies velocities
+    // if temeprature compute is kokkosized host-devcie comm won't be needed
+    atomKK->modified(Host,V_MASK);
+    atomKK->sync(execution_space,V_MASK);
+  }
+
+  // compute langevin force in parallel on the device
+  FSUM s_fsum;
+  if (tstyle == ATOM)
+    if (gjfflag)
+      if (tallyflag)
+        if (tbiasflag == BIAS)
+          if (rmass)
+            if (zeroflag) {
+              FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,1,1,1> post_functor(this);
+              Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
+            }
+            else{
+              FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,1,1,0> post_functor(this);
+              Kokkos::parallel_for(nlocal,post_functor);
+            }
+          else
+            if (zeroflag) {
+              FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,1,0,1> post_functor(this);
+              Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
+            }
+            else {
+              FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,1,0,0> post_functor(this);
+              Kokkos::parallel_for(nlocal,post_functor);
+            }
+        else
+          if (rmass)
+            if (zeroflag) {
+              FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,0,1,1> post_functor(this);
+              Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
+            }
+            else {
+              FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,0,1,0> post_functor(this);
+              Kokkos::parallel_for(nlocal,post_functor);
+            }
+          else
+            if (zeroflag) {
+              FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,0,0,1> post_functor(this);
+              Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
+            }
+            else{
+              FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,0,0,0> post_functor(this);
+              Kokkos::parallel_for(nlocal,post_functor);
+            }
+      else
+        if (tbiasflag == BIAS)
+          if (rmass)
+            if (zeroflag) {
+              FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,1,1,1> post_functor(this);
+              Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
+            }
+            else {
+              FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,1,1,0> post_functor(this);
+              Kokkos::parallel_for(nlocal,post_functor);
+            }
+          else
+            if (zeroflag) {
+              FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,1,0,1> post_functor(this);
+              Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
+            }
+            else {
+              FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,1,0,0> post_functor(this);
+              Kokkos::parallel_for(nlocal,post_functor);
+            }
+        else
+          if (rmass)
+            if (zeroflag) {
+              FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,0,1,1> post_functor(this);
+              Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
+            }
+            else {
+              FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,0,1,0> post_functor(this);
+              Kokkos::parallel_for(nlocal,post_functor);
+            }
+          else
+            if (zeroflag) {
+              FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,0,0,1> post_functor(this);
+              Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
+            }
+            else {
+              FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,0,0,0> post_functor(this);
+              Kokkos::parallel_for(nlocal,post_functor);
+            }
+    else
+      if (tallyflag)
+        if (tbiasflag == BIAS)
+          if (rmass)
+            if (zeroflag) {
+              FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,1,1,1> post_functor(this);
+              Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
+            }
+            else {
+              FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,1,1,0> post_functor(this);
+              Kokkos::parallel_for(nlocal,post_functor);
+            }
+          else
+            if (zeroflag) {
+              FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,1,0,1> post_functor(this);
+              Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
+            }
+            else {
+              FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,1,0,0> post_functor(this);
+              Kokkos::parallel_for(nlocal,post_functor);
+            }
+        else
+          if (rmass)
+            if (zeroflag) {
+              FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,0,1,1> post_functor(this);
+              Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
+            }
+            else {
+              FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,0,1,0> post_functor(this);
+              Kokkos::parallel_for(nlocal,post_functor);
+            }
+          else
+            if (zeroflag) {
+              FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,0,0,1> post_functor(this);
+              Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
+            }
+            else {
+              FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,0,0,0> post_functor(this);
+              Kokkos::parallel_for(nlocal,post_functor);
+            }
+      else
+        if (tbiasflag == BIAS)
+          if (rmass)
+            if (zeroflag) {
+              FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,1,1,1> post_functor(this);
+              Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
+            }
+            else {
+              FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,1,1,0> post_functor(this);
+              Kokkos::parallel_for(nlocal,post_functor);
+            }
+          else
+            if (zeroflag) {
+              FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,1,0,1> post_functor(this);
+              Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
+            }
+            else {
+              FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,1,0,0> post_functor(this);
+              Kokkos::parallel_for(nlocal,post_functor);
+            }
+        else
+          if (rmass)
+            if (zeroflag) {
+              FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,0,1,1> post_functor(this);
+              Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
+            }
+            else {
+              FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,0,1,0> post_functor(this);
+              Kokkos::parallel_for(nlocal,post_functor);
+            }
+          else
+            if (zeroflag) {
+              FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,0,0,1> post_functor(this);
+              Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
+            }
+            else {
+              FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,0,0,0> post_functor(this);
+              Kokkos::parallel_for(nlocal,post_functor);
+            }
+  else
+    if (gjfflag)
+      if (tallyflag)
+        if (tbiasflag == BIAS)
+          if (rmass)
+            if (zeroflag) {
+              FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,1,1,1> post_functor(this);
+              Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
+            }
+            else {
+              FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,1,1,0> post_functor(this);
+              Kokkos::parallel_for(nlocal,post_functor);
+            }
+          else
+            if (zeroflag) {
+              FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,1,0,1> post_functor(this);
+              Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
+            }
+            else {
+              FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,1,0,0> post_functor(this);
+              Kokkos::parallel_for(nlocal,post_functor);
+            }
+        else
+          if (rmass)
+            if (zeroflag) {
+              FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,0,1,1> post_functor(this);
+              Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
+            }
+            else {
+              FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,0,1,0> post_functor(this);
+              Kokkos::parallel_for(nlocal,post_functor);
+            }
+          else
+            if (zeroflag) {
+              FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,0,0,1> post_functor(this);
+              Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
+            }
+            else {
+              FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,0,0,0> post_functor(this);
+              Kokkos::parallel_for(nlocal,post_functor);
+            }
+      else
+        if (tbiasflag == BIAS)
+          if (rmass)
+            if (zeroflag) {
+              FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,1,1,1> post_functor(this);
+              Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
+            }
+            else {
+              FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,1,1,0> post_functor(this);
+              Kokkos::parallel_for(nlocal,post_functor);
+            }
+          else
+            if (zeroflag) {
+              FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,1,0,1> post_functor(this);
+              Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
+            }
+            else {
+              FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,1,0,0> post_functor(this);
+              Kokkos::parallel_for(nlocal,post_functor);
+            }
+        else
+          if (rmass)
+            if (zeroflag) {
+              FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,0,1,1> post_functor(this);
+              Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
+            }
+            else {
+              FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,0,1,0> post_functor(this);
+              Kokkos::parallel_for(nlocal,post_functor);
+            }
+          else
+            if (zeroflag) {
+              FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,0,0,1> post_functor(this);
+              Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
+            }
+            else {
+              FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,0,0,0> post_functor(this);
+              Kokkos::parallel_for(nlocal,post_functor);
+            }
+    else
+      if (tallyflag)
+        if (tbiasflag == BIAS)
+          if (rmass)
+            if (zeroflag) {
+              FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,1,1,1> post_functor(this);
+              Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
+            }
+            else {
+              FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,1,1,0> post_functor(this);
+              Kokkos::parallel_for(nlocal,post_functor);
+            }
+          else
+            if (zeroflag) {
+              FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,1,0,1> post_functor(this);
+              Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
+            }
+            else {
+              FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,1,0,0> post_functor(this);
+              Kokkos::parallel_for(nlocal,post_functor);
+            }
+        else
+          if (rmass)
+            if (zeroflag) {
+              FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,0,1,1> post_functor(this);
+              Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
+            }
+            else {
+              FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,0,1,0> post_functor(this);
+              Kokkos::parallel_for(nlocal,post_functor);
+            }
+          else
+            if (zeroflag) {
+              FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,0,0,1> post_functor(this);
+              Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
+            }
+            else {
+              FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,0,0,0> post_functor(this);
+              Kokkos::parallel_for(nlocal,post_functor);
+            }
+      else
+        if (tbiasflag == BIAS)
+          if (rmass)
+            if (zeroflag) {
+              FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,1,1,1> post_functor(this);
+              Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
+            }
+            else {
+              FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,1,1,0> post_functor(this);
+              Kokkos::parallel_for(nlocal,post_functor);
+            }
+          else
+            if (zeroflag) {
+              FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,1,0,1> post_functor(this);
+              Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
+            }
+            else {
+              FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,1,0,0> post_functor(this);
+              Kokkos::parallel_for(nlocal,post_functor);
+            }
+        else
+          if (rmass)
+            if (zeroflag) {
+              FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,0,1,1> post_functor(this);
+              Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
+            }
+            else {
+              FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,0,1,0> post_functor(this);
+              Kokkos::parallel_for(nlocal,post_functor);
+            }
+          else
+            if (zeroflag) {
+              FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,0,0,1> post_functor(this);
+              Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
+            }
+            else {
+              FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,0,0,0> post_functor(this);
+              Kokkos::parallel_for(nlocal,post_functor);
+            }
+
+  DeviceType::fence();
+
+  if(tbiasflag == BIAS){
+    temperature->restore_bias_all(); // modifies velocities
+    atomKK->modified(Host,V_MASK);
+  }
+
+  // set modify flags for the views modified in post_force functor
+  if (gjfflag) k_franprev.template modify<DeviceType>();
+  if (tallyflag) k_flangevin.template modify<DeviceType>();
+
+  // set total force to zero
+  if (zeroflag) {
+    fsum[0] = s_fsum.fx; fsum[1] = s_fsum.fy; fsum[2] = s_fsum.fz;
+    MPI_Allreduce(fsum,fsumall,3,MPI_DOUBLE,MPI_SUM,world);
+    h_fsumall(0) = fsumall[0]/count;
+    h_fsumall(1) = fsumall[1]/count;
+    h_fsumall(2) = fsumall[2]/count;
+    k_fsumall.template modify<LMPHostType>();
+    k_fsumall.template sync<DeviceType>();
+    // set total force zero in parallel on the device
+    FixLangevinKokkosZeroForceFunctor<DeviceType> zero_functor(this);
+    Kokkos::parallel_for(nlocal,zero_functor);
+    DeviceType::fence();
+  }
+  // f is modified by both post_force and zero_force functors
+  atomKK->modified(execution_space,datamask_modify);
+
+  // thermostat omega and angmom
+  //  if (oflag) omega_thermostat();
+  //  if (ascale) angmom_thermostat();
+
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+template<int Tp_TSTYLEATOM, int Tp_GJF, int Tp_TALLY,
+         int Tp_BIAS, int Tp_RMASS, int Tp_ZERO>
+KOKKOS_INLINE_FUNCTION
+FSUM FixLangevinKokkos<DeviceType>::post_force_item(int i) const
+{
+  FSUM fsum;
+  double fdrag[3],fran[3];
+  double gamma1,gamma2;
+  double fswap;
+  double tsqrt_t = tsqrt;
+
+  if (mask[i] & groupbit) {
+    rand_type rand_gen = rand_pool.get_state();
+    if(Tp_TSTYLEATOM) tsqrt_t = sqrt(d_tforce[i]);
+    if(Tp_RMASS){
+      gamma1 = -rmass[i] / t_period / ftm2v;
+      gamma2 = sqrt(rmass[i]) * fran_prop_const / ftm2v;
+      gamma1 *= 1.0/d_ratio[type[i]];
+      gamma2 *= 1.0/sqrt(d_ratio[type[i]]) * tsqrt_t;
+    } else {
+      gamma1 = d_gfactor1[type[i]];
+      gamma2 = d_gfactor2[type[i]] * tsqrt_t;
+    }
+
+    fran[0] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5);
+    fran[1] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5);
+    fran[2] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5);
+
+    if(Tp_BIAS){
+      fdrag[0] = gamma1*v(i,0);
+      fdrag[1] = gamma1*v(i,1);
+      fdrag[2] = gamma1*v(i,2);
+      if (v(i,0) == 0.0) fran[0] = 0.0;
+      if (v(i,1) == 0.0) fran[1] = 0.0;
+      if (v(i,2) == 0.0) fran[2] = 0.0;
+    }else{
+      fdrag[0] = gamma1*v(i,0);
+      fdrag[1] = gamma1*v(i,1);
+      fdrag[2] = gamma1*v(i,2);
+    }
+
+    if (Tp_GJF) {
+      fswap = 0.5*(fran[0]+d_franprev(i,0));
+      d_franprev(i,0) = fran[0];
+      fran[0] = fswap;
+      fswap = 0.5*(fran[1]+d_franprev(i,1));
+      d_franprev(i,1) = fran[1];
+      fran[1] = fswap;
+      fswap = 0.5*(fran[2]+d_franprev(i,2));
+      d_franprev(i,2) = fran[2];
+      fran[2] = fswap;
+
+      fdrag[0] *= gjffac;
+      fdrag[1] *= gjffac;
+      fdrag[2] *= gjffac;
+      fran[0] *= gjffac;
+      fran[1] *= gjffac;
+      fran[2] *= gjffac;
+      f(i,0) *= gjffac;
+      f(i,1) *= gjffac;
+      f(i,2) *= gjffac;
+    }
+
+    f(i,0) += fdrag[0] + fran[0];
+    f(i,1) += fdrag[1] + fran[1];
+    f(i,2) += fdrag[2] + fran[2];
+
+    if (Tp_TALLY) {
+      d_flangevin(i,0) = fdrag[0] + fran[0];
+      d_flangevin(i,1) = fdrag[1] + fran[1];
+      d_flangevin(i,2) = fdrag[2] + fran[2];
+    }
+
+    if (Tp_ZERO) {
+      fsum.fx = fran[0];
+      fsum.fy = fran[1];
+      fsum.fz = fran[2];
+    }
+    rand_pool.free_state(rand_gen);
+  }
+
+  return fsum;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void FixLangevinKokkos<DeviceType>::zero_force_item(int i) const
+{
+  if (mask[i] & groupbit) {
+    f(i,0) -= d_fsumall[0];
+    f(i,1) -= d_fsumall[1];
+    f(i,2) -= d_fsumall[2];
+  }
+
+}
+
+/* ----------------------------------------------------------------------
+   set current t_target and t_sqrt
+   ------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void FixLangevinKokkos<DeviceType>::compute_target()
+{
+  atomKK->sync(Host, MASK_MASK);
+  mask = atomKK->k_mask.template view<DeviceType>();
+  int nlocal = atomKK->nlocal;
+
+  double delta = update->ntimestep - update->beginstep;
+  if (delta != 0.0) delta /= update->endstep - update->beginstep;
+
+  // if variable temp, evaluate variable, wrap with clear/add
+  // reallocate tforce array if necessary
+
+  if (tstyle == CONSTANT) {
+    t_target = t_start + delta * (t_stop-t_start);
+    tsqrt = sqrt(t_target);
+  } else {
+    modify->clearstep_compute();
+    if (tstyle == EQUAL) {
+      t_target = input->variable->compute_equal(tvar);
+      if (t_target < 0.0)
+        error->one(FLERR,"Fix langevin variable returned negative temperature");
+      tsqrt = sqrt(t_target);
+    } else {
+      if (nlocal > maxatom2) {
+        maxatom2 = atom->nmax;
+        memory->destroy_kokkos(k_tforce,tforce);
+        memory->create_kokkos(k_tforce,tforce,maxatom2,"langevin:tforce");
+        d_tforce = k_tforce.template view<DeviceType>();
+        h_tforce = k_tforce.template view<LMPHostType>();
+      }
+      input->variable->compute_atom(tvar,igroup,tforce,1,0); // tforce is modified on host
+      k_tforce.template modify<LMPHostType>();
+      for (int i = 0; i < nlocal; i++)
+        if (mask[i] & groupbit)
+          if (h_tforce[i] < 0.0)
+            error->one(FLERR,
+                       "Fix langevin variable returned negative temperature");
+    }
+    modify->addstep_compute(update->ntimestep + 1);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void FixLangevinKokkos<DeviceType>::reset_dt()
+{
+  if (atomKK->mass) {
+    for (int i = 1; i <= atomKK->ntypes; i++) {
+      h_gfactor2[i] = sqrt(atomKK->mass[i]) *
+        sqrt(24.0*force->boltz/t_period/update->dt/force->mvv2e) /
+        force->ftm2v;
+      h_gfactor2[i] *= 1.0/sqrt(h_ratio[i]);
+    }
+    k_gfactor2.template modify<LMPHostType>();
+  }
+
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+double FixLangevinKokkos<DeviceType>::compute_scalar()
+{
+  if (!tallyflag || flangevin == NULL) return 0.0;
+
+  v = atomKK->k_v.template view<DeviceType>();
+  mask = atomKK->k_mask.template view<DeviceType>();
+
+  // capture the very first energy transfer to thermal reservoir
+
+  if (update->ntimestep == update->beginstep) {
+    energy_onestep = 0.0;
+    atomKK->sync(execution_space,V_MASK | MASK_MASK);
+    int nlocal = atomKK->nlocal;
+    k_flangevin.template sync<DeviceType>();
+    FixLangevinKokkosTallyEnergyFunctor<DeviceType> scalar_functor(this);
+    Kokkos::parallel_reduce(nlocal,scalar_functor,energy_onestep);
+    DeviceType::fence();
+    energy = 0.5*energy_onestep*update->dt;
+  }
+
+  // convert midstep energy back to previous fullstep energy
+  double energy_me = energy - 0.5*energy_onestep*update->dt;
+  double energy_all;
+  MPI_Allreduce(&energy_me,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
+  return -energy_all;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+double FixLangevinKokkos<DeviceType>::compute_energy_item(int i) const
+{
+  double energy;
+  if (mask[i] & groupbit)
+    energy = d_flangevin(i,0)*v(i,0) + d_flangevin(i,1)*v(i,1) +
+      d_flangevin(i,2)*v(i,2);
+  return energy;
+}
+
+/* ----------------------------------------------------------------------
+   tally energy transfer to thermal reservoir
+   ------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void FixLangevinKokkos<DeviceType>::end_of_step()
+{
+  if (!tallyflag) return;
+
+  v = atomKK->k_v.template view<DeviceType>();
+  mask = atomKK->k_mask.template view<DeviceType>();
+
+  atomKK->sync(execution_space,V_MASK | MASK_MASK);
+  int nlocal = atomKK->nlocal;
+
+  energy_onestep = 0.0;
+
+  k_flangevin.template sync<DeviceType>();
+  FixLangevinKokkosTallyEnergyFunctor<DeviceType> tally_functor(this);
+  Kokkos::parallel_reduce(nlocal,tally_functor,energy_onestep);
+  DeviceType::fence();
+
+  energy += energy_onestep*update->dt;
+}
+
+/* ----------------------------------------------------------------------
+   copy values within local atom-based array
+   ------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void FixLangevinKokkos<DeviceType>::copy_arrays(int i, int j, int delflag)
+{
+  for (int m = 0; m < nvalues; m++)
+    h_franprev(j,m) = h_franprev(i,m);
+
+  k_franprev.template modify<LMPHostType>();
+
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void FixLangevinKokkos<DeviceType>::cleanup_copy()
+{
+  random = NULL;
+  tstr = NULL;
+  gfactor1 = NULL;
+  gfactor2 = NULL;
+  ratio = NULL;
+  id_temp = NULL;
+  flangevin = NULL;
+  tforce = NULL;
+  gjfflag = 0;
+  franprev = NULL;
+  id = style = NULL;
+  vatom = NULL;
+}
+
+template class FixLangevinKokkos<LMPDeviceType>;
+#ifdef KOKKOS_HAVE_CUDA
+template class FixLangevinKokkos<LMPHostType>;
+#endif
diff --git a/src/KOKKOS/fix_langevin_kokkos.h b/src/KOKKOS/fix_langevin_kokkos.h
new file mode 100644
index 0000000000000000000000000000000000000000..48a7d5ec576260565c40e3857107d34da6ae06c5
--- /dev/null
+++ b/src/KOKKOS/fix_langevin_kokkos.h
@@ -0,0 +1,266 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+   ------------------------------------------------------------------------- */
+
+#ifdef FIX_CLASS
+
+FixStyle(langevin/kk,FixLangevinKokkos<LMPDeviceType>)
+FixStyle(langevin/kk/device,FixLangevinKokkos<LMPDeviceType>)
+FixStyle(langevin/kk/host,FixLangevinKokkos<LMPHostType>)
+
+#else
+
+#ifndef LMP_FIX_LANGEVIN_KOKKOS_H
+#define LMP_FIX_LANGEVIN_KOKKOS_H
+
+#include "fix_langevin.h"
+#include "kokkos_type.h"
+#include "Kokkos_Random.hpp"
+#include "comm_kokkos.h"
+
+namespace LAMMPS_NS {
+
+  struct s_FSUM {
+    double fx, fy, fz;
+    KOKKOS_INLINE_FUNCTION
+    s_FSUM() {
+      fx = fy = fz = 0.0;
+    }
+    KOKKOS_INLINE_FUNCTION
+    s_FSUM& operator+=(const s_FSUM &rhs){
+      fx += rhs.fx;
+      fy += rhs.fy;
+      fz += rhs.fz;
+      return *this;
+    }
+  };
+  typedef s_FSUM FSUM;
+
+  template<class DeviceType>
+    class FixLangevinKokkos;
+
+  template<class DeviceType,int Tp_TSTYLEATOM, int Tp_GJF, int Tp_TALLY,
+    int Tp_BIAS, int Tp_RMASS, int Tp_ZERO>
+    class FixLangevinKokkosPostForceFunctor;
+
+  template<class DeviceType> class FixLangevinKokkosZeroForceFunctor;
+
+  template<class DeviceType> class FixLangevinKokkosTallyEnergyFunctor;
+
+  template<class DeviceType>
+    class FixLangevinKokkos : public FixLangevin {
+  public:
+    FixLangevinKokkos(class LAMMPS *, int, char **);
+    ~FixLangevinKokkos();
+
+    void cleanup_copy();
+    void init();
+    void post_force(int);
+    void reset_dt();
+    void grow_arrays(int);
+    void copy_arrays(int i, int j, int delflag);
+    double compute_scalar();
+    void end_of_step();
+
+    template<int Tp_TSTYLEATOM, int Tp_GJF, int Tp_TALLY,
+      int Tp_BIAS, int Tp_RMASS, int Tp_ZERO>
+      KOKKOS_INLINE_FUNCTION
+      FSUM post_force_item(int) const;
+
+    KOKKOS_INLINE_FUNCTION
+      void zero_force_item(int) const;
+
+    KOKKOS_INLINE_FUNCTION
+      double compute_energy_item(int) const;
+
+  private:
+    class CommKokkos *commKK;
+    class AtomKokkos *atomKK;
+    double *rmass;
+    typename ArrayTypes<DeviceType>::tdual_double_2d k_franprev;
+    typename ArrayTypes<DeviceType>::t_double_2d d_franprev;
+    HAT::t_double_2d h_franprev;
+
+    typename ArrayTypes<DeviceType>::tdual_double_2d k_flangevin;
+    typename ArrayTypes<DeviceType>::t_double_2d d_flangevin;
+    HAT::t_double_2d h_flangevin;
+
+    typename ArrayTypes<DeviceType>::tdual_double_1d k_tforce;
+    typename ArrayTypes<DeviceType>::t_double_1d d_tforce;
+    HAT::t_double_1d h_tforce;
+
+    typename ArrayTypes<DeviceType>::t_v_array v;
+    typename ArrayTypes<DeviceType>::t_f_array f;
+    typename ArrayTypes<DeviceType>::t_int_1d type;
+    typename ArrayTypes<DeviceType>::t_int_1d mask;
+
+    typename ArrayTypes<DeviceType>::tdual_double_1d k_gfactor1, k_gfactor2, k_ratio;
+    typename ArrayTypes<DeviceType>::t_double_1d d_gfactor1, d_gfactor2, d_ratio;
+    HAT::t_double_1d h_gfactor1, h_gfactor2, h_ratio;
+
+    typedef Kokkos::DualView<double[3], DeviceType>
+      tdual_double_1d_3n;
+    tdual_double_1d_3n k_fsumall;
+    typename tdual_double_1d_3n::t_dev d_fsumall;
+    typename tdual_double_1d_3n::t_host h_fsumall;
+
+    double boltz,dt,mvv2e,ftm2v,fran_prop_const;
+
+    void compute_target();
+
+    Kokkos::Random_XorShift64_Pool<DeviceType> rand_pool;
+    typedef typename Kokkos::Random_XorShift64_Pool<DeviceType>::generator_type rand_type;
+
+  };
+
+  template <class DeviceType,int Tp_TSTYLEATOM, int Tp_GJF, int Tp_TALLY,
+    int Tp_BIAS, int Tp_RMASS, int Tp_ZERO>
+    struct FixLangevinKokkosPostForceFunctor {
+
+      typedef DeviceType  device_type;
+      typedef FSUM value_type;
+      FixLangevinKokkos<DeviceType> c;
+
+    FixLangevinKokkosPostForceFunctor(FixLangevinKokkos<DeviceType>* c_ptr):
+      c(*c_ptr) {}
+      ~FixLangevinKokkosPostForceFunctor(){c.cleanup_copy();}
+
+      KOKKOS_INLINE_FUNCTION
+      void operator()(const int i) const {
+        c.template post_force_item<Tp_TSTYLEATOM,Tp_GJF, Tp_TALLY,
+          Tp_BIAS,Tp_RMASS,Tp_ZERO>(i);
+      }
+
+      KOKKOS_INLINE_FUNCTION
+      void operator()(const int i, value_type &fsum) const {
+
+        fsum += c.template post_force_item<Tp_TSTYLEATOM,Tp_GJF, Tp_TALLY,
+          Tp_BIAS,Tp_RMASS,Tp_ZERO>(i);
+      }
+
+      KOKKOS_INLINE_FUNCTION
+      static void init(volatile value_type &update) {
+        update.fx = 0.0;
+        update.fy = 0.0;
+        update.fz = 0.0;
+      }
+      KOKKOS_INLINE_FUNCTION
+      static void join(volatile value_type &update,
+                       const volatile value_type &source) {
+        update.fx += source.fx;
+        update.fy += source.fy;
+        update.fz += source.fz;
+      }
+
+    };
+
+  template <class DeviceType>
+    struct FixLangevinKokkosZeroForceFunctor {
+      typedef DeviceType  device_type ;
+      FixLangevinKokkos<DeviceType> c;
+
+    FixLangevinKokkosZeroForceFunctor(FixLangevinKokkos<DeviceType>* c_ptr):
+      c(*c_ptr) {c.cleanup_copy();}
+
+      KOKKOS_INLINE_FUNCTION
+      void operator()(const int i) const {
+        c.zero_force_item(i);
+      }
+    };
+
+  template<class DeviceType>
+    struct FixLangevinKokkosTallyEnergyFunctor {
+      typedef DeviceType  device_type ;
+      FixLangevinKokkos<DeviceType> c;
+      typedef double value_type;
+    FixLangevinKokkosTallyEnergyFunctor(FixLangevinKokkos<DeviceType>* c_ptr):
+      c(*c_ptr) {c.cleanup_copy();}
+
+      KOKKOS_INLINE_FUNCTION
+      void operator()(const int i, value_type &energy) const {
+        energy += c.compute_energy_item(i);
+      }
+      KOKKOS_INLINE_FUNCTION
+      static void init(volatile value_type &update) {
+        update = 0.0;
+      }
+      KOKKOS_INLINE_FUNCTION
+      static void join(volatile value_type &update,
+                       const volatile value_type &source) {
+        update += source;
+      }
+    };
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+   E: Illegal ... command
+
+   Self-explanatory.  Check the input script syntax and compare to the
+   documentation for the command.  You can use -echo screen as a
+   command-line option when running LAMMPS to see the offending line.
+
+   E: Fix langevin period must be > 0.0
+
+   The time window for temperature relaxation must be > 0
+
+   E: Fix langevin omega requires atom style sphere
+
+   Self-explanatory.
+
+   E: Fix langevin angmom requires atom style ellipsoid
+
+   Self-explanatory.
+
+   E: Variable name for fix langevin does not exist
+
+   Self-explanatory.
+
+   E: Variable for fix langevin is invalid style
+
+   It must be an equal-style variable.
+
+   E: Fix langevin omega requires extended particles
+
+   One of the particles has radius 0.0.
+
+   E: Fix langevin angmom requires extended particles
+
+   This fix option cannot be used with point paritlces.
+
+   E: Cannot zero Langevin force of 0 atoms
+
+   The group has zero atoms, so you cannot request its force
+   be zeroed.
+
+   E: Fix langevin variable returned negative temperature
+
+   Self-explanatory.
+
+   E: Could not find fix_modify temperature ID
+
+   The compute ID for computing temperature does not exist.
+
+   E: Fix_modify temperature ID does not compute temperature
+
+   The compute ID assigned to the fix must compute temperature.
+
+   W: Group for fix_modify temp != fix group
+
+   The fix_modify command is specifying a temperature computation that
+   computes a temperature on a different group of atoms than the fix
+   itself operates on.  This is probably not what you want to do.
+
+*/
diff --git a/src/KOKKOS/fix_nve_kokkos.cpp b/src/KOKKOS/fix_nve_kokkos.cpp
index 3076dca4fa415638ef0041130bdf0238b86cc17c..6f7f1e6300147c551667a05b96c468e144cad204 100644
--- a/src/KOKKOS/fix_nve_kokkos.cpp
+++ b/src/KOKKOS/fix_nve_kokkos.cpp
@@ -172,6 +172,6 @@ void FixNVEKokkos<DeviceType>::cleanup_copy()
 }
 
 template class FixNVEKokkos<LMPDeviceType>;
-#if DEVICE==2
+#ifdef KOKKOS_HAVE_CUDA
 template class FixNVEKokkos<LMPHostType>;
 #endif
diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp
index b73e25dbc67ffefc806d9b905c8e3f5ae05f2217..5ddd1bac60f1ce5f499a64eb857617ad52c2967e 100644
--- a/src/KOKKOS/kokkos.cpp
+++ b/src/KOKKOS/kokkos.cpp
@@ -23,8 +23,6 @@
 
 using namespace LAMMPS_NS;
 
-enum{FULL,HALFTHREAD,HALF};
-
 /* ---------------------------------------------------------------------- */
 
 KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
@@ -49,13 +47,13 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
                strcmp(arg[iarg],"gpus") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Invalid Kokkos command-line args");
       int ngpu = atoi(arg[iarg+1]);
-      iarg += 2;
 
       int skip_gpu = 9999;
       if (iarg+2 < narg && isdigit(arg[iarg+2][0])) {
         skip_gpu = atoi(arg[iarg+2]);
         iarg++;
       }
+      iarg += 2;
 
       char *str;
       if (str = getenv("SLURM_LOCALID")) {
@@ -89,7 +87,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
 
   // initialize Kokkos
 
-#if DEVICE==2
+#ifdef KOKKOS_HAVE_CUDA
   Kokkos::Cuda::host_mirror_device_type::initialize(num_threads,numa);
   Kokkos::Cuda::SelectDevice select_device(device);
   Kokkos::Cuda::initialize(select_device);
@@ -112,7 +110,7 @@ KokkosLMP::~KokkosLMP()
 {
   // finalize Kokkos
 
-#if DEVICE==2
+#ifdef KOKKOS_HAVE_CUDA
   Kokkos::Cuda::finalize();
   Kokkos::Cuda::host_mirror_device_type::finalize();
 #else
diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h
index d115ed0bb9cca1f3ad193859d0b225ec583b1a01..4d62ec747d967bfc4689c4ad956cfd5a3fcf564a 100644
--- a/src/KOKKOS/kokkos_type.h
+++ b/src/KOKKOS/kokkos_type.h
@@ -14,71 +14,43 @@
 #ifndef LMP_LMPTYPE_KOKKOS_H
 #define LMP_LMPTYPE_KOKKOS_H
 
-#include <Kokkos_View.hpp>
-#include <Kokkos_Macros.hpp>
-#include <Kokkos_Atomic.hpp>
+#include <Kokkos_Core.hpp>
 #include <Kokkos_DualView.hpp>
 #include <impl/Kokkos_Timer.hpp>
 #include <Kokkos_Vectorization.hpp>
 
 #define MAX_TYPES_STACKPARAMS 12
 #define NeighClusterSize 8
-// set LMPHostype and LMPDeviceType
 
-#ifndef DEVICE
-#define DEVICE 1
+#ifndef __CUDACC__
+  struct double2 {
+    double x, y;
+  };
+  struct float2 {
+    float x, y;
+  };
+  struct double4 {
+    double x, y, z, w;
+  };
+  struct float4 {
+    float x, y, z, w;
+  };
 #endif
 
-#if DEVICE==1
-  #ifdef KOKKOS_HAVE_OPENMP
-    #include "Kokkos_OpenMP.hpp"
-    typedef Kokkos::OpenMP LMPDeviceType;
-    typedef Kokkos::OpenMP LMPHostType;
-  #else
-    #include "Kokkos_Threads.hpp"
-    typedef Kokkos::Threads LMPDeviceType;
-    typedef Kokkos::Threads LMPHostType;
-  #endif
-  #ifndef __CUDACC__
-    struct double2 {
-      double x, y;
-    };
-    struct float2 {
-      float x, y;
-    };
-    struct double4 {
-      double x, y, z, w;
-    };
-    struct float4 {
-      float x, y, z, w;
-    };
-  #endif
-#else
-  #include "cuda.h"
-  #include "cuda_runtime.h"
-  #include "Kokkos_Cuda.hpp"
-  #include "Kokkos_Threads.hpp"
-  typedef Kokkos::Cuda LMPDeviceType;
-  typedef Kokkos::Cuda::host_mirror_device_type LMPHostType;
-#endif
+// set LMPHostype and LMPDeviceType from Kokkos Default Types
+typedef Kokkos::DefaultExecutionSpace LMPDeviceType;
+typedef Kokkos::DefaultExecutionSpace::host_mirror_device_type LMPHostType;
 
 // set ExecutionSpace stuct with variable "space"
 
 template<class Device>
 struct ExecutionSpaceFromDevice;
 
-#ifdef KOKKOS_HAVE_OPENMP
-template<>
-struct ExecutionSpaceFromDevice<Kokkos::OpenMP> {
-  static const LAMMPS_NS::ExecutionSpace space = LAMMPS_NS::Host;
-};
-#else
 template<>
-struct ExecutionSpaceFromDevice<Kokkos::Threads> {
+struct ExecutionSpaceFromDevice<LMPHostType> {
   static const LAMMPS_NS::ExecutionSpace space = LAMMPS_NS::Host;
 };
-#endif
-#if DEVICE==2
+#ifdef KOKKOS_HAVE_CUDA
 template<>
 struct ExecutionSpaceFromDevice<Kokkos::Cuda> {
   static const LAMMPS_NS::ExecutionSpace space = LAMMPS_NS::Device;
@@ -142,16 +114,27 @@ struct s_EV_FLOAT {
   }
 
   KOKKOS_INLINE_FUNCTION
-  s_EV_FLOAT& operator+=(const s_EV_FLOAT &rhs) {
-	evdwl += rhs.evdwl;
-	ecoul += rhs.ecoul;
-	v[0] += rhs.v[0];
-	v[1] += rhs.v[1];
-	v[2] += rhs.v[2];
-	v[3] += rhs.v[3];
-	v[4] += rhs.v[4];
-	v[5] += rhs.v[5];
-	return *this;
+  void operator+=(const s_EV_FLOAT &rhs) {
+    evdwl += rhs.evdwl;
+    ecoul += rhs.ecoul;
+    v[0] += rhs.v[0];
+    v[1] += rhs.v[1];
+    v[2] += rhs.v[2];
+    v[3] += rhs.v[3];
+    v[4] += rhs.v[4];
+    v[5] += rhs.v[5];
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator+=(const volatile s_EV_FLOAT &rhs) volatile {
+    evdwl += rhs.evdwl;
+    ecoul += rhs.ecoul;
+    v[0] += rhs.v[0];
+    v[1] += rhs.v[1];
+    v[2] += rhs.v[2];
+    v[3] += rhs.v[3];
+    v[4] += rhs.v[4];
+    v[5] += rhs.v[5];
   }
 };
 typedef struct s_EV_FLOAT EV_FLOAT;
@@ -240,7 +223,7 @@ typedef tdual_int_2d::t_dev_const_um t_int_2d_const_um;
 typedef tdual_int_2d::t_dev_const_randomread t_int_2d_randomread;
 
 typedef Kokkos::
-  DualView<LAMMPS_NS::tagint*, LMPDeviceType::array_layout, LMPDeviceType> 
+  DualView<LAMMPS_NS::tagint*, LMPDeviceType::array_layout, LMPDeviceType>
   tdual_tagint_1d;
 typedef tdual_tagint_1d::t_dev t_tagint_1d;
 typedef tdual_tagint_1d::t_dev_const t_tagint_1d_const;
@@ -249,7 +232,16 @@ typedef tdual_tagint_1d::t_dev_const_um t_tagint_1d_const_um;
 typedef tdual_tagint_1d::t_dev_const_randomread t_tagint_1d_randomread;
 
 typedef Kokkos::
-  DualView<LAMMPS_NS::imageint*, LMPDeviceType::array_layout, LMPDeviceType> 
+  DualView<LAMMPS_NS::tagint**, LMPDeviceType::array_layout, LMPDeviceType>
+  tdual_tagint_2d;
+typedef tdual_tagint_2d::t_dev t_tagint_2d;
+typedef tdual_tagint_2d::t_dev_const t_tagint_2d_const;
+typedef tdual_tagint_2d::t_dev_um t_tagint_2d_um;
+typedef tdual_tagint_2d::t_dev_const_um t_tagint_2d_const_um;
+typedef tdual_tagint_2d::t_dev_const_randomread t_tagint_2d_randomread;
+
+typedef Kokkos::
+  DualView<LAMMPS_NS::imageint*, LMPDeviceType::array_layout, LMPDeviceType>
   tdual_imageint_1d;
 typedef tdual_imageint_1d::t_dev t_imageint_1d;
 typedef tdual_imageint_1d::t_dev_const t_imageint_1d_const;
@@ -257,6 +249,22 @@ typedef tdual_imageint_1d::t_dev_um t_imageint_1d_um;
 typedef tdual_imageint_1d::t_dev_const_um t_imageint_1d_const_um;
 typedef tdual_imageint_1d::t_dev_const_randomread t_imageint_1d_randomread;
 
+typedef Kokkos::
+  DualView<double*, Kokkos::LayoutRight, LMPDeviceType> tdual_double_1d;
+typedef tdual_double_1d::t_dev t_double_1d;
+typedef tdual_double_1d::t_dev_const t_double_1d_const;
+typedef tdual_double_1d::t_dev_um t_double_1d_um;
+typedef tdual_double_1d::t_dev_const_um t_double_1d_const_um;
+typedef tdual_double_1d::t_dev_const_randomread t_double_1d_randomread;
+
+typedef Kokkos::
+  DualView<double**, Kokkos::LayoutRight, LMPDeviceType> tdual_double_2d;
+typedef tdual_double_2d::t_dev t_double_2d;
+typedef tdual_double_2d::t_dev_const t_double_2d_const;
+typedef tdual_double_2d::t_dev_um t_double_2d_um;
+typedef tdual_double_2d::t_dev_const_um t_double_2d_const_um;
+typedef tdual_double_2d::t_dev_const_randomread t_double_2d_randomread;
+
 // 1d float array n
 
 typedef Kokkos::DualView<LMP_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_float_1d;
@@ -406,7 +414,7 @@ typedef tdual_neighbors_2d::t_dev_const_randomread t_neighbors_2d_randomread;
 
 };
 
-#if DEVICE==2
+#ifdef KOKKOS_HAVE_CUDA
 template <>
 struct ArrayTypes<LMPHostType> {
 
@@ -446,13 +454,40 @@ typedef tdual_tagint_1d::t_host_um t_tagint_1d_um;
 typedef tdual_tagint_1d::t_host_const_um t_tagint_1d_const_um;
 typedef tdual_tagint_1d::t_host_const_randomread t_tagint_1d_randomread;
 
-typedef Kokkos::DualView<LAMMPS_NS::imageint*, LMPDeviceType::array_layout, LMPDeviceType> tdual_imageint_1d;
+typedef Kokkos::
+  DualView<LAMMPS_NS::tagint**, LMPDeviceType::array_layout, LMPDeviceType>
+  tdual_tagint_2d;
+typedef tdual_tagint_2d::t_host t_tagint_2d;
+typedef tdual_tagint_2d::t_host_const t_tagint_2d_const;
+typedef tdual_tagint_2d::t_host_um t_tagint_2d_um;
+typedef tdual_tagint_2d::t_host_const_um t_tagint_2d_const_um;
+typedef tdual_tagint_2d::t_host_const_randomread t_tagint_2d_randomread;
+
+typedef Kokkos::
+  DualView<LAMMPS_NS::imageint*, LMPDeviceType::array_layout, LMPDeviceType>
+  tdual_imageint_1d;
 typedef tdual_imageint_1d::t_host t_imageint_1d;
 typedef tdual_imageint_1d::t_host_const t_imageint_1d_const;
 typedef tdual_imageint_1d::t_host_um t_imageint_1d_um;
 typedef tdual_imageint_1d::t_host_const_um t_imageint_1d_const_um;
 typedef tdual_imageint_1d::t_host_const_randomread t_imageint_1d_randomread;
 
+typedef Kokkos::
+  DualView<double*, Kokkos::LayoutRight, LMPDeviceType> tdual_double_1d;
+typedef tdual_double_1d::t_host t_double_1d;
+typedef tdual_double_1d::t_host_const t_double_1d_const;
+typedef tdual_double_1d::t_host_um t_double_1d_um;
+typedef tdual_double_1d::t_host_const_um t_double_1d_const_um;
+typedef tdual_double_1d::t_host_const_randomread t_double_1d_randomread;
+
+typedef Kokkos::
+  DualView<double**, Kokkos::LayoutRight, LMPDeviceType> tdual_double_2d;
+typedef tdual_double_2d::t_host t_double_2d;
+typedef tdual_double_2d::t_host_const t_double_2d_const;
+typedef tdual_double_2d::t_host_um t_double_2d_um;
+typedef tdual_double_2d::t_host_const_um t_double_2d_const_um;
+typedef tdual_double_2d::t_host_const_randomread t_double_2d_randomread;
+
 //1d float array n
 typedef Kokkos::DualView<LMP_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_float_1d;
 typedef tdual_float_1d::t_host t_float_1d;
diff --git a/src/KOKKOS/neigh_full_kokkos.h b/src/KOKKOS/neigh_full_kokkos.h
index 9112e5049a4cbadba411592aba0a606514c19420..effc1cf622b725919eef98cca31d4734d209952f 100644
--- a/src/KOKKOS/neigh_full_kokkos.h
+++ b/src/KOKKOS/neigh_full_kokkos.h
@@ -13,6 +13,7 @@
 
 #include "atom_kokkos.h"
 #include "atom_masks.h"
+#include "domain_kokkos.h"
 
 using namespace LAMMPS_NS;
 
@@ -24,7 +25,7 @@ void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
   const int nall = includegroup?atom->nfirst:atom->nlocal;
   list->grow(nall);
 
-  NeighborKokkosExecute<DeviceType> 
+  NeighborKokkosExecute<DeviceType>
     data(*list,
          k_cutneighsq.view<DeviceType>(),
          k_bincount.view<DeviceType>(),
@@ -33,14 +34,46 @@ void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
          atomKK->k_type.view<DeviceType>(),
          atomKK->k_mask.view<DeviceType>(),
          atomKK->k_molecule.view<DeviceType>(),
+         atomKK->k_tag.view<DeviceType>(),
+         atomKK->k_special.view<DeviceType>(),
+         atomKK->k_nspecial.view<DeviceType>(),
+         atomKK->molecular,
          nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo,
          bininvx,bininvy,bininvz,
-         bboxhi,bboxlo);
+         exclude, nex_type,maxex_type,
+         k_ex1_type.view<DeviceType>(),
+         k_ex2_type.view<DeviceType>(),
+         k_ex_type.view<DeviceType>(),
+         nex_group,maxex_group,
+         k_ex1_group.view<DeviceType>(),
+         k_ex2_group.view<DeviceType>(),
+         k_ex1_bit.view<DeviceType>(),
+         k_ex2_bit.view<DeviceType>(),
+         nex_mol, maxex_mol,
+         k_ex_mol_group.view<DeviceType>(),
+         k_ex_mol_bit.view<DeviceType>(),
+         bboxhi,bboxlo,
+         domain->xperiodic,domain->yperiodic,domain->zperiodic,
+         domain->xprd_half,domain->yprd_half,domain->zprd_half);
 
   k_cutneighsq.sync<DeviceType>();
-  atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK);
+  k_ex1_type.sync<DeviceType>();
+  k_ex2_type.sync<DeviceType>();
+  k_ex_type.sync<DeviceType>();
+  k_ex1_group.sync<DeviceType>();
+  k_ex2_group.sync<DeviceType>();
+  k_ex1_bit.sync<DeviceType>();
+  k_ex2_bit.sync<DeviceType>();
+  k_ex_mol_group.sync<DeviceType>();
+  k_ex_mol_bit.sync<DeviceType>();
+  atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK);
   Kokkos::deep_copy(list->d_stencil,list->h_stencil);
 
+  data.special_flag[0] = special_flag[0];
+  data.special_flag[1] = special_flag[1];
+  data.special_flag[2] = special_flag[2];
+  data.special_flag[3] = special_flag[3];
+
   while(data.h_resize() > 0) {
     data.h_resize() = 0;
     deep_copy(data.resize, data.h_resize);
@@ -78,24 +111,24 @@ void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
 
   Kokkos::deep_copy(data.resize, data.h_resize);
   Kokkos::deep_copy(data.new_maxneighs, data.h_new_maxneighs);
-#if DEVICE==2
+#ifdef KOKKOS_HAVE_CUDA
     #define BINS_PER_BLOCK 2
     const int factor = atoms_per_bin<64?2:1;
-    Kokkos::ParallelWorkRequest config((mbins+factor-1)/factor,atoms_per_bin*factor);
+    Kokkos::TeamPolicy<DeviceType> config((mbins+factor-1)/factor,atoms_per_bin*factor);
 #else
     const int factor = 1;
 #endif
 
 if(newton_pair) {
   NeighborKokkosBuildFunctor<DeviceType,HALF_NEIGH,1> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
-#if DEVICE==2
+#ifdef KOKKOS_HAVE_CUDA
   Kokkos::parallel_for(config, f);
 #else
   Kokkos::parallel_for(nall, f);
 #endif
 } else {
   NeighborKokkosBuildFunctor<DeviceType,HALF_NEIGH,0> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
-#if DEVICE==2
+#ifdef KOKKOS_HAVE_CUDA
   Kokkos::parallel_for(config, f);
 #else
   Kokkos::parallel_for(nall, f);
@@ -134,6 +167,62 @@ void NeighborKokkosExecute<Device>::binatomsItem(const int &i) const
   }
 }
 
+/* ---------------------------------------------------------------------- */
+template<class Device>
+KOKKOS_INLINE_FUNCTION
+int NeighborKokkosExecute<Device>::find_special(const int &i, const int &j) const
+{
+  const int n1 = nspecial(i,0);
+  const int n2 = nspecial(i,1);
+  const int n3 = nspecial(i,2);
+
+  for (int k = 0; k < n3; k++) {
+    if (special(i,k) == tag(j)) {
+      if (k < n1) {
+        if (special_flag[1] == 0) return -1;
+        else if (special_flag[1] == 1) return 0;
+        else return 1;
+      } else if (k < n2) {
+        if (special_flag[2] == 0) return -1;
+        else if (special_flag[2] == 1) return 0;
+        else return 2;
+      } else {
+        if (special_flag[3] == 0) return -1;
+        else if (special_flag[3] == 1) return 0;
+        else return 3;
+      }
+    }
+  }
+  return 0;
+};
+
+/* ---------------------------------------------------------------------- */
+
+template<class Device>
+KOKKOS_INLINE_FUNCTION
+int NeighborKokkosExecute<Device>::exclusion(const int &i,const int &j,
+                                             const int &itype,const int &jtype) const
+{
+  int m;
+
+  if (nex_type && ex_type(itype,jtype)) return 1;
+
+  if (nex_group) {
+    for (m = 0; m < nex_group; m++) {
+      if (mask(i) & ex1_bit(m) && mask(j) & ex2_bit(m)) return 1;
+      if (mask(i) & ex2_bit(m) && mask(j) & ex1_bit(m)) return 1;
+    }
+  }
+
+  if (nex_mol) {
+    for (m = 0; m < nex_mol; m++)
+      if (mask(i) & ex_mol_bit(m) && mask(j) & ex_mol_bit(m) &&
+          molecule(i) == molecule(j)) return 1;
+  }
+
+  return 0;
+}
+
 /* ---------------------------------------------------------------------- */
 
 template<class Device> template<int HalfNeigh,int GhostNewton>
@@ -142,7 +231,10 @@ void NeighborKokkosExecute<Device>::
 {
   /* if necessary, goto next page and add pages */
   int n = 0;
-
+  int which = 0;
+  int moltemplate;
+  if (molecular == 2) moltemplate = 1;
+  else moltemplate = 0;
   // get subview of neighbors of i
 
   const AtomNeighbors neighbors_i = neigh_list.get_neighbors(i);
@@ -161,52 +253,81 @@ void NeighborKokkosExecute<Device>::
   if(HalfNeigh)
   for(int m = 0; m < c_bincount(ibin); m++) {
     const int j = c_bins(ibin,m);
-  // printf("%i %i %i\n",i,ibin,m,c_bincount(ibin),j);
     const int jtype = type(j);
+
     //for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using HalfNeighborlists
     if((j == i) || (HalfNeigh && !GhostNewton && (j < i))  ||
         (HalfNeigh && GhostNewton && ((j < i) || ((j >= nlocal) &&
                                        ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
                                         (x(j, 2) == ztmp && x(j, 1)  == ytmp && x(j, 0) < xtmp)))))
       ) continue;
-    //if(Exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
+    if(exclude && exclusion(i,j,itype,jtype)) continue;
 
     const X_FLOAT delx = xtmp - x(j, 0);
     const X_FLOAT dely = ytmp - x(j, 1);
     const X_FLOAT delz = ztmp - x(j, 2);
     const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
     if(rsq <= cutneighsq(itype,jtype)) {
-      if(n<neigh_list.maxneighs) neighbors_i(n) = j;
-      n++;
+      if (molecular) {
+        if (!moltemplate)
+          which = find_special(i,j);
+            /* else if (imol >= 0) */
+            /*   which = find_special(onemols[imol]->special[iatom], */
+            /*                        onemols[imol]->nspecial[iatom], */
+            /*                        tag[j]-tagprev); */
+            /* else which = 0; */
+        if (which == 0){
+          if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
+        }else if (minimum_image_check(delx,dely,delz)){
+          if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
+        }
+        else if (which > 0) {
+          if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
+        }
+      } else {
+        if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
+      }
     }
   }
 
   for(int k = 0; k < nstencil; k++) {
     const int jbin = ibin + stencil[k];
     // get subview of jbin
-    if(!GhostNewton&&HalfNeigh&&(ibin==jbin)) continue;
+    if(HalfNeigh&&(ibin==jbin)) continue;
     //const ArrayTypes<Device>::t_int_1d_const_um =Kokkos::subview<t_int_1d_const_um>(bins,jbin,ALL);
       for(int m = 0; m < c_bincount(jbin); m++) {
         const int j = c_bins(jbin,m);
-        //if(i==0)
-        //printf("%i %i %i %i %i %i %i\n",i,jbin,m,c_bincount(jbin),j,k,stencil[k]);
         const int jtype = type(j);
 
         if(HalfNeigh && !GhostNewton && (j < i)) continue;
         if(!HalfNeigh && j==i) continue;
-        //if(Exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
+        if(exclude && exclusion(i,j,itype,jtype)) continue;
 
         const X_FLOAT delx = xtmp - x(j, 0);
         const X_FLOAT dely = ytmp - x(j, 1);
         const X_FLOAT delz = ztmp - x(j, 2);
         const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
-      //if(i==0)
-        //printf("%i %i %lf %lf NEIGHS\n",i,j,rsq,cutneighsq(itype,jtype));
 
         if(rsq <= cutneighsq(itype,jtype)) {
-          if(n<neigh_list.maxneighs) neighbors_i(n) = j;
-          n++;
+          if (molecular) {
+            if (!moltemplate)
+              which = find_special(i,j);
+            /* else if (imol >= 0) */
+            /*   which = find_special(onemols[imol]->special[iatom], */
+            /*                        onemols[imol]->nspecial[iatom], */
+            /*                        tag[j]-tagprev); */
+            /* else which = 0; */
+            if (which == 0){
+              if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
+            }else if (minimum_image_check(delx,dely,delz)){
+              if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
+            }
+            else if (which > 0) {
+              if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
+            }
+          } else {
+            if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
+          }
         }
 
       }
@@ -222,23 +343,23 @@ void NeighborKokkosExecute<Device>::
   neigh_list.d_ilist(i) = i;
 }
 
-#if DEVICE==2
+#ifdef KOKKOS_HAVE_CUDA
 extern __shared__ X_FLOAT sharedmem[];
 
 /* ---------------------------------------------------------------------- */
 
-template<class DeviceType> template<int HalfNeigh>
+template<class DeviceType> template<int HalfNeigh,int GhostNewton>
 __device__ inline
-void NeighborKokkosExecute<DeviceType>::build_ItemCuda(DeviceType dev) const
+void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const
 {
   /* loop over atoms in i's bin,
   */
   const int atoms_per_bin = c_bins.dimension_1();
-  const int BINS_PER_TEAM = blockDim.x/atoms_per_bin;
-  const int MY_BIN = threadIdx.x/atoms_per_bin;
-  const int MY_II = threadIdx.x%atoms_per_bin;
+  const int BINS_PER_TEAM = dev.team_size()/atoms_per_bin<1?1:dev.team_size()/atoms_per_bin;
+  const int TEAMS_PER_BIN = atoms_per_bin/dev.team_size()<1?1:atoms_per_bin/dev.team_size();
+  const int MY_BIN = dev.team_rank()/atoms_per_bin;
 
-  const int ibin = (blockIdx.x)*BINS_PER_TEAM+MY_BIN;
+  const int ibin = dev.league_rank()*BINS_PER_TEAM+MY_BIN;
 
   if(ibin >=c_bincount.dimension_0()) return;
   X_FLOAT* other_x = sharedmem;
@@ -248,6 +369,8 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(DeviceType dev) const
 
   int bincount_current = c_bincount[ibin];
 
+  for(int kk = 0; kk < TEAMS_PER_BIN; kk++) {
+    const int MY_II = dev.team_rank()%atoms_per_bin+kk*dev.team_size();
   const int i = MY_II < bincount_current ? c_bins(ibin, MY_II) : -1;
   /* if necessary, goto next page and add pages */
 
@@ -278,17 +401,45 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(DeviceType dev) const
     #pragma unroll 4
     for(int m = 0; m < bincount_current; m++) {
       int j = other_id[m];
+      const int jtype = other_x[m + 3 * atoms_per_bin];
 
       //for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using halfneighborlists
-      //if(j==i) continue;
-      if((j == i) || (HalfNeigh && (j < i)))  continue;
-
+      if((j == i) ||
+         (HalfNeigh && !GhostNewton && (j < i))  ||
+         (HalfNeigh && GhostNewton &&
+            ((j < i) ||
+            ((j >= nlocal) && ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
+              (x(j, 2) == ztmp && x(j, 1)  == ytmp && x(j, 0) < xtmp)))))
+        ) continue;
+      if(exclude && exclusion(i,j,itype,jtype)) continue;
       const X_FLOAT delx = xtmp - other_x[m];
       const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin];
       const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin];
-      const int jtype = other_x[m + 3 * atoms_per_bin];
       const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
-      if((rsq <= cutneighsq(itype,jtype)) && (n < neigh_list.maxneighs)) neighbors_i(n++) = j;
+
+      if(rsq <= cutneighsq(itype,jtype)) {
+        if (molecular) {
+          int which = 0;
+          if (!moltemplate)
+            which = find_special(i,j);
+          /* else if (imol >= 0) */
+          /*   which = find_special(onemols[imol]->special[iatom], */
+          /*                        onemols[imol]->nspecial[iatom], */
+          /*                        tag[j]-tagprev); */
+          /* else which = 0; */
+          if (which == 0){
+            if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
+          }else if (minimum_image_check(delx,dely,delz)){
+            if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
+          }
+          else if (which > 0) {
+            if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
+          }
+        } else {
+          if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
+        }
+      }
+
     }
   }
   __syncthreads();
@@ -319,15 +470,41 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(DeviceType dev) const
       #pragma unroll 8
       for(int m = 0; m < bincount_current; m++) {
         const int j = other_id[m];
+        const int jtype = other_x[m + 3 * atoms_per_bin];
 
-        if(HalfNeigh && (j < i))  continue;
+        //if(HalfNeigh && (j < i))  continue;
+        if(HalfNeigh && !GhostNewton && (j < i)) continue;
+        if(!HalfNeigh && j==i) continue;
+        if(exclude && exclusion(i,j,itype,jtype)) continue;
 
         const X_FLOAT delx = xtmp - other_x[m];
         const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin];
         const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin];
-        const int jtype = other_x[m + 3 * atoms_per_bin];
         const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
-        if((rsq <= cutneighsq(itype,jtype)) && (n < neigh_list.maxneighs)) neighbors_i(n++) = j;
+
+        if(rsq <= cutneighsq(itype,jtype)) {
+          if (molecular) {
+            int which = 0;
+            if (!moltemplate)
+              which = find_special(i,j);
+            /* else if (imol >= 0) */
+            /*   which = find_special(onemols[imol]->special[iatom], */
+            /*                        onemols[imol]->nspecial[iatom], */
+            /*                        tag[j]-tagprev); */
+            /* else which = 0; */
+            if (which == 0){
+              if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
+            }else if (minimum_image_check(delx,dely,delz)){
+              if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
+            }
+            else if (which > 0) {
+              if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
+            }
+          } else {
+            if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
+          }
+        }
+
       }
     }
     __syncthreads();
@@ -343,6 +520,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(DeviceType dev) const
 
     if(n >= new_maxneighs()) new_maxneighs() = n;
   }
+  }
 }
 #endif
 
@@ -361,12 +539,45 @@ void NeighborKokkos::full_bin_cluster_kokkos(NeighListKokkos<DeviceType> *list)
          atomKK->k_type.view<DeviceType>(),
          atomKK->k_mask.view<DeviceType>(),
          atomKK->k_molecule.view<DeviceType>(),
+         atomKK->k_tag.view<DeviceType>(),
+         atomKK->k_special.view<DeviceType>(),
+         atomKK->k_nspecial.view<DeviceType>(),
+         atomKK->molecular,
          nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo,
          bininvx,bininvy,bininvz,
-         bboxhi,bboxlo);
+         exclude, nex_type,maxex_type,
+         k_ex1_type.view<DeviceType>(),
+         k_ex2_type.view<DeviceType>(),
+         k_ex_type.view<DeviceType>(),
+         nex_group,maxex_group,
+         k_ex1_group.view<DeviceType>(),
+         k_ex2_group.view<DeviceType>(),
+         k_ex1_bit.view<DeviceType>(),
+         k_ex2_bit.view<DeviceType>(),
+         nex_mol, maxex_mol,
+         k_ex_mol_group.view<DeviceType>(),
+         k_ex_mol_bit.view<DeviceType>(),
+         bboxhi,bboxlo,
+         domain->xperiodic,domain->yperiodic,domain->zperiodic,
+         domain->xprd_half,domain->yprd_half,domain->zprd_half);
 
   k_cutneighsq.sync<DeviceType>();
-  atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK);
+  k_ex1_type.sync<DeviceType>();
+  k_ex2_type.sync<DeviceType>();
+  k_ex_type.sync<DeviceType>();
+  k_ex1_group.sync<DeviceType>();
+  k_ex2_group.sync<DeviceType>();
+  k_ex1_bit.sync<DeviceType>();
+  k_ex2_bit.sync<DeviceType>();
+  k_ex_mol_group.sync<DeviceType>();
+  k_ex_mol_bit.sync<DeviceType>();
+
+  data.special_flag[0] = special_flag[0];
+  data.special_flag[1] = special_flag[1];
+  data.special_flag[2] = special_flag[2];
+  data.special_flag[3] = special_flag[3];
+
+  atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK);
   Kokkos::deep_copy(list->d_stencil,list->h_stencil);
   DeviceType::fence();
 
@@ -407,24 +618,24 @@ void NeighborKokkos::full_bin_cluster_kokkos(NeighListKokkos<DeviceType> *list)
 
   Kokkos::deep_copy(data.resize, data.h_resize);
   Kokkos::deep_copy(data.new_maxneighs, data.h_new_maxneighs);
-#if DEVICE==2
+#ifdef KOKKOS_HAVE_CUDA
     #define BINS_PER_BLOCK 2
     const int factor = atoms_per_bin<64?2:1;
-    Kokkos::ParallelWorkRequest config((mbins+factor-1)/factor,atoms_per_bin*factor);
+    Kokkos::TeamPolicy<DeviceType> config((mbins+factor-1)/factor,atoms_per_bin*factor);
 #else
     const int factor = 1;
 #endif
 
 if(newton_pair) {
   NeighborClusterKokkosBuildFunctor<DeviceType,NeighClusterSize> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
-//#if DEVICE==2
+//#ifdef KOKKOS_HAVE_CUDA
 //  Kokkos::parallel_for(config, f);
 //#else
   Kokkos::parallel_for(nall, f);
 //#endif
 } else {
   NeighborClusterKokkosBuildFunctor<DeviceType,NeighClusterSize> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
-//#if DEVICE==2
+//#ifdef KOKKOS_HAVE_CUDA
 //  Kokkos::parallel_for(config, f);
 //#else
   Kokkos::parallel_for(nall, f);
diff --git a/src/KOKKOS/neigh_list_kokkos.cpp b/src/KOKKOS/neigh_list_kokkos.cpp
index dbb0aa57279e82644c87a15e41760c0279115496..2730c15a2ba6f1bc1919284cd5a01cc1358b0ffb 100644
--- a/src/KOKKOS/neigh_list_kokkos.cpp
+++ b/src/KOKKOS/neigh_list_kokkos.cpp
@@ -113,6 +113,6 @@ void NeighListKokkos<Device>::stencil_allocate(int smax, int style)
 }
 
 template class NeighListKokkos<LMPDeviceType>;
-#if DEVICE==2
+#ifdef KOKKOS_HAVE_CUDA
 template class NeighListKokkos<LMPHostType>;
 #endif
diff --git a/src/KOKKOS/neigh_list_kokkos.h b/src/KOKKOS/neigh_list_kokkos.h
index fd4ac3acc9f40447dc16d17a9b33d171dce8975a..fa0eaba76a27f8e05c32e99ea0a9c1e6c3d96554 100644
--- a/src/KOKKOS/neigh_list_kokkos.h
+++ b/src/KOKKOS/neigh_list_kokkos.h
@@ -20,7 +20,7 @@
 
 namespace LAMMPS_NS {
 
-enum{FULL,HALFTHREAD,HALF,N2,FULLCLUSTER};
+enum{FULL=1u,HALFTHREAD=2u,HALF=4u,N2=8u,FULLCLUSTER=16u};
 
 class AtomNeighbors
 {
diff --git a/src/KOKKOS/neighbor_kokkos.cpp b/src/KOKKOS/neighbor_kokkos.cpp
index adea8239764c1b42a9a5453b728c346ea136189c..133ac7cea01ed066f2fa558b8848dd9cced72f20 100644
--- a/src/KOKKOS/neighbor_kokkos.cpp
+++ b/src/KOKKOS/neighbor_kokkos.cpp
@@ -51,6 +51,16 @@ NeighborKokkos::~NeighborKokkos()
 
   delete [] pair_build_device;
   delete [] pair_build_host;
+
+  memory->destroy_kokkos(k_ex1_type,ex1_type);
+  memory->destroy_kokkos(k_ex2_type,ex2_type);
+  memory->destroy_kokkos(k_ex1_group,ex1_group);
+  memory->destroy_kokkos(k_ex2_group,ex2_group);
+  memory->destroy_kokkos(k_ex_mol_group,ex_mol_group);
+  memory->destroy_kokkos(k_ex1_bit,ex1_bit);
+  memory->destroy_kokkos(k_ex2_bit,ex2_bit);
+  memory->destroy_kokkos(k_ex_mol_bit,ex_mol_bit);
+
 }
 
 /* ---------------------------------------------------------------------- */
@@ -72,7 +82,7 @@ void NeighborKokkos::init_cutneighsq_kokkos(int n)
 /* ---------------------------------------------------------------------- */
 
 int NeighborKokkos::init_lists_kokkos()
-{ 
+{
   int i;
 
   for (i = 0; i < nlist_host; i++) delete lists_host[i];
@@ -211,6 +221,32 @@ void NeighborKokkos::init_list_grow_kokkos(int i)
 
 /* ---------------------------------------------------------------------- */
 
+void NeighborKokkos::init_ex_type_kokkos(int n)
+{
+  memory->create_kokkos(k_ex_type,ex_type,n+1,n+1,"neigh:ex_type");
+  k_ex_type.modify<LMPHostType>();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void NeighborKokkos::init_ex_bit_kokkos()
+{
+  memory->create_kokkos(k_ex1_bit, ex1_bit, nex_group, "neigh:ex1_bit");
+  k_ex1_bit.modify<LMPHostType>();
+  memory->create_kokkos(k_ex2_bit, ex2_bit, nex_group, "neigh:ex2_bit");
+  k_ex2_bit.modify<LMPHostType>();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void NeighborKokkos::init_ex_mol_bit_kokkos()
+{
+  memory->create_kokkos(k_ex_mol_bit, ex_mol_bit, nex_mol, "neigh:ex_mol_bit");
+  k_ex_mol_bit.modify<LMPHostType>();
+}
+
+/* ---------------------------------------------------------------------- */
+
 void NeighborKokkos::choose_build(int index, NeighRequest *rq)
 {
   if (rq->kokkos_host != 0) {
@@ -264,6 +300,29 @@ void NeighborKokkos::setup_bins_kokkos(int i)
   }
 }
 
+/* ---------------------------------------------------------------------- */
+
+void NeighborKokkos::modify_ex_type_grow_kokkos(){
+  memory->grow_kokkos(k_ex1_type,ex1_type,maxex_type,"neigh:ex1_type");
+  k_ex1_type.modify<LMPHostType>();
+  memory->grow_kokkos(k_ex2_type,ex2_type,maxex_type,"neigh:ex2_type");
+  k_ex2_type.modify<LMPHostType>();
+}
+
+/* ---------------------------------------------------------------------- */
+void NeighborKokkos::modify_ex_group_grow_kokkos(){
+  memory->grow_kokkos(k_ex1_group,ex1_group,maxex_group,"neigh:ex1_group");
+  k_ex1_group.modify<LMPHostType>();
+  memory->grow_kokkos(k_ex2_group,ex2_group,maxex_group,"neigh:ex2_group");
+  k_ex2_group.modify<LMPHostType>();
+}
+
+/* ---------------------------------------------------------------------- */
+void NeighborKokkos::modify_mol_group_grow_kokkos(){
+  memory->grow_kokkos(k_ex_mol_group,ex_mol_group,maxex_mol,"neigh:ex_mol_group");
+  k_ex_mol_group.modify<LMPHostType>();
+}
+
 // include to trigger instantiation of templated functions
 
 #include "neigh_full_kokkos.h"
diff --git a/src/KOKKOS/neighbor_kokkos.h b/src/KOKKOS/neighbor_kokkos.h
index 9abbd007380e7f5dad4ba3bf77b023f97437a8b4..3736f1405b1f29dfdf8cafd3e2fcd48652d522d8 100644
--- a/src/KOKKOS/neighbor_kokkos.h
+++ b/src/KOKKOS/neighbor_kokkos.h
@@ -17,6 +17,7 @@
 #include "neighbor.h"
 #include "neigh_list_kokkos.h"
 #include "kokkos_type.h"
+#include <math.h>
 
 namespace LAMMPS_NS {
 
@@ -33,8 +34,15 @@ class NeighborKokkosExecute
   typename AT::t_int_2d bins;
   typename AT::t_int_2d_const c_bins;
   const typename AT::t_x_array_randomread x;
-  const typename AT::t_int_1d_const type,mask;
-  const typename AT::t_tagint_1d_const molecule;
+  const typename AT::t_int_1d_const type,mask,molecule;
+
+  const typename AT::t_tagint_1d_const tag;
+  const typename AT::t_tagint_2d_const special;
+  const typename AT::t_int_2d_const nspecial;
+  const int molecular;
+  int moltemplate;
+
+  int special_flag[4];
 
   const int nbinx,nbiny,nbinz;
   const int mbinx,mbiny,mbinz;
@@ -44,38 +52,88 @@ class NeighborKokkosExecute
 
   const int nlocal;
 
+  const int exclude;
+
+  const int nex_type;
+  const int maxex_type;
+  const typename AT::t_int_1d_const ex1_type,ex2_type;
+  const typename AT::t_int_2d_const ex_type;
+
+  const int nex_group;
+  const int maxex_group;
+  const typename AT::t_int_1d_const ex1_group,ex2_group;
+  const typename AT::t_int_1d_const ex1_bit,ex2_bit;
+
+  const int nex_mol;
+  const int maxex_mol;
+  const typename AT::t_int_1d_const ex_mol_group;
+  const typename AT::t_int_1d_const ex_mol_bit;
+
   typename AT::t_int_scalar resize;
   typename AT::t_int_scalar new_maxneighs;
   typename ArrayTypes<LMPHostType>::t_int_scalar h_resize;
   typename ArrayTypes<LMPHostType>::t_int_scalar h_new_maxneighs;
 
+  const int xperiodic, yperiodic, zperiodic;
+  const int xprd_half, yprd_half, zprd_half;
+
   NeighborKokkosExecute(
-    const NeighListKokkos<Device> &_neigh_list,
-    const typename AT::t_xfloat_2d_randomread &_cutneighsq,
-    const typename AT::t_int_1d &_bincount,
-    const typename AT::t_int_2d &_bins,
-    const int _nlocal,
-        const typename AT::t_x_array_randomread &_x,
-    const typename AT::t_int_1d_const &_type,
-    const typename AT::t_int_1d_const &_mask,
-    const typename AT::t_tagint_1d_const &_molecule,
-    const int & _nbinx,const int & _nbiny,const int & _nbinz,
-    const int & _mbinx,const int & _mbiny,const int & _mbinz,
-    const int & _mbinxlo,const int & _mbinylo,const int & _mbinzlo,
-    const X_FLOAT &_bininvx,const X_FLOAT &_bininvy,const X_FLOAT &_bininvz,
-    const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo):
+                        const NeighListKokkos<Device> &_neigh_list,
+                        const typename AT::t_xfloat_2d_randomread &_cutneighsq,
+                        const typename AT::t_int_1d &_bincount,
+                        const typename AT::t_int_2d &_bins,
+                        const int _nlocal,
+                        const typename AT::t_x_array_randomread &_x,
+                        const typename AT::t_int_1d_const &_type,
+                        const typename AT::t_int_1d_const &_mask,
+                        const typename AT::t_int_1d_const &_molecule,
+                        const typename AT::t_tagint_1d_const &_tag,
+                        const typename AT::t_tagint_2d_const &_special,
+                        const typename AT::t_int_2d_const &_nspecial,
+                        const int &_molecular,
+                        const int & _nbinx,const int & _nbiny,const int & _nbinz,
+                        const int & _mbinx,const int & _mbiny,const int & _mbinz,
+                        const int & _mbinxlo,const int & _mbinylo,const int & _mbinzlo,
+                        const X_FLOAT &_bininvx,const X_FLOAT &_bininvy,const X_FLOAT &_bininvz,
+                        const int & _exclude,const int & _nex_type,const int & _maxex_type,
+                        const typename AT::t_int_1d_const & _ex1_type,
+                        const typename AT::t_int_1d_const & _ex2_type,
+                        const typename AT::t_int_2d_const & _ex_type,
+                        const int & _nex_group,const int & _maxex_group,
+                        const typename AT::t_int_1d_const & _ex1_group,
+                        const typename AT::t_int_1d_const & _ex2_group,
+                        const typename AT::t_int_1d_const & _ex1_bit,
+                        const typename AT::t_int_1d_const & _ex2_bit,
+                        const int & _nex_mol,const int & _maxex_mol,
+                        const typename AT::t_int_1d_const & _ex_mol_group,
+                        const typename AT::t_int_1d_const & _ex_mol_bit,
+                        const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo,
+                        const int & _xperiodic, const int & _yperiodic, const int & _zperiodic,
+                        const int & _xprd_half, const int & _yprd_half, const int & _zprd_half):
     neigh_list(_neigh_list), cutneighsq(_cutneighsq),
     bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins),
     nlocal(_nlocal),
     x(_x),type(_type),mask(_mask),molecule(_molecule),
+    tag(_tag),special(_special),nspecial(_nspecial),molecular(_molecular),
     nbinx(_nbinx),nbiny(_nbiny),nbinz(_nbinz),
     mbinx(_mbinx),mbiny(_mbiny),mbinz(_mbinz),
     mbinxlo(_mbinxlo),mbinylo(_mbinylo),mbinzlo(_mbinzlo),
-    bininvx(_bininvx),bininvy(_bininvy),bininvz(_bininvz) {
+    bininvx(_bininvx),bininvy(_bininvy),bininvz(_bininvz),
+    exclude(_exclude),nex_type(_nex_type),maxex_type(_maxex_type),
+    ex1_type(_ex1_type),ex2_type(_ex2_type),ex_type(_ex_type),
+    nex_group(_nex_group),maxex_group(_maxex_group),
+    ex1_group(_ex1_group),ex2_group(_ex2_group),
+    ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),nex_mol(_nex_mol),maxex_mol(_maxex_mol),
+    ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit),
+    xperiodic(_xperiodic),yperiodic(_yperiodic),zperiodic(_zperiodic),
+    xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half){
+
+    if (molecular == 2) moltemplate = 1;
+    else moltemplate = 0;
 
     bboxlo[0] = _bboxlo[0]; bboxlo[1] = _bboxlo[1]; bboxlo[2] = _bboxlo[2];
     bboxhi[0] = _bboxhi[0]; bboxhi[1] = _bboxhi[1]; bboxhi[2] = _bboxhi[2];
-    
+
     resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize");
 #ifndef KOKKOS_USE_UVM
     h_resize = Kokkos::create_mirror_view(resize);
@@ -103,10 +161,10 @@ class NeighborKokkosExecute
   KOKKOS_FUNCTION
   void build_cluster_Item(const int &i) const;
 
-#if DEVICE==2
-  template<int HalfNeigh>
+#ifdef KOKKOS_HAVE_CUDA
+  template<int HalfNeigh, int GhostNewton>
   __device__ inline
-  void build_ItemCuda(Device dev) const;
+  void build_ItemCuda(typename Kokkos::TeamPolicy<Device>::member_type dev) const;
 #endif
 
   KOKKOS_INLINE_FUNCTION
@@ -143,6 +201,21 @@ class NeighborKokkosExecute
 
     return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
   }
+
+  KOKKOS_INLINE_FUNCTION
+  int exclusion(const int &i,const int &j, const int &itype,const int &jtype) const;
+
+  KOKKOS_INLINE_FUNCTION
+  int find_special(const int &i, const int &j) const;
+
+  KOKKOS_INLINE_FUNCTION
+  int minimum_image_check(double dx, double dy, double dz) const {
+    if (xperiodic && fabs(dx) > xprd_half) return 1;
+    if (yperiodic && fabs(dy) > yprd_half) return 1;
+    if (zperiodic && fabs(dz) > zprd_half) return 1;
+    return 0;
+  }
+
 };
 
 template<class Device>
@@ -175,12 +248,12 @@ struct NeighborKokkosBuildFunctor {
   void operator() (const int & i) const {
     c.template build_Item<HALF_NEIGH,GHOST_NEWTON>(i);
   }
-#if DEVICE==2
+#ifdef KOKKOS_HAVE_CUDA
   KOKKOS_INLINE_FUNCTION
-  void operator() (Device dev) const {
-    c.template build_ItemCuda<HALF_NEIGH>(dev);
+  void operator() (typename Kokkos::TeamPolicy<Device>::member_type dev) const {
+    c.template build_ItemCuda<HALF_NEIGH,GHOST_NEWTON>(dev);
   }
-  size_t shmem_size() const { return sharedsize; }
+  size_t shmem_size(const int team_size) const { (void) team_size; return sharedsize; }
 #endif
 };
 
@@ -220,15 +293,28 @@ class NeighborKokkos : public Neighbor {
   DAT::tdual_int_1d k_bincount;
   DAT::tdual_int_2d k_bins;
 
+  DAT::tdual_int_1d k_ex1_type,k_ex2_type;
+  DAT::tdual_int_2d k_ex_type;
+  DAT::tdual_int_1d k_ex1_group,k_ex2_group;
+  DAT::tdual_int_1d k_ex1_bit,k_ex2_bit;
+  DAT::tdual_int_1d k_ex_mol_group;
+  DAT::tdual_int_1d k_ex_mol_bit;
+
   void init_cutneighsq_kokkos(int);
   int init_lists_kokkos();
   void init_list_flags1_kokkos(int);
   void init_list_flags2_kokkos(int);
   void init_list_grow_kokkos(int);
+  void init_ex_type_kokkos(int);
+  void init_ex_bit_kokkos();
+  void init_ex_mol_bit_kokkos();
   void choose_build(int, NeighRequest *);
   void build_kokkos(int);
   void setup_bins_kokkos(int);
-  
+  void modify_ex_type_grow_kokkos();
+  void modify_ex_group_grow_kokkos();
+  void modify_mol_group_grow_kokkos();
+
   typedef void (NeighborKokkos::*PairPtrHost)
     (class NeighListKokkos<LMPHostType> *);
   PairPtrHost *pair_build_host;
diff --git a/src/KOKKOS/pair_coul_cut_kokkos.cpp b/src/KOKKOS/pair_coul_cut_kokkos.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..494df9267fc8c9f155582c134633ba0ec01d0a1f
--- /dev/null
+++ b/src/KOKKOS/pair_coul_cut_kokkos.cpp
@@ -0,0 +1,266 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "string.h"
+#include "pair_coul_cut_kokkos.h"
+#include "kokkos.h"
+#include "atom_kokkos.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "neigh_request.h"
+#include "update.h"
+#include "integrate.h"
+#include "respa.h"
+#include "math_const.h"
+#include "memory.h"
+#include "error.h"
+#include "atom_masks.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define KOKKOS_CUDA_MAX_THREADS 256
+#define KOKKOS_CUDA_MIN_BLOCKS 8
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairCoulCutKokkos<DeviceType>::PairCoulCutKokkos(LAMMPS *lmp) : PairCoulCut(lmp)
+{
+  atomKK = (AtomKokkos *) atom;
+  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
+  datamask_read = X_MASK | F_MASK | TYPE_MASK | Q_MASK | ENERGY_MASK | VIRIAL_MASK;
+  datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
+  cutsq = NULL;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairCoulCutKokkos<DeviceType>::~PairCoulCutKokkos()
+{
+  if (allocated)
+    memory->destroy_kokkos(k_cutsq, cutsq);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairCoulCutKokkos<DeviceType>::cleanup_copy() {
+ // WHY needed: this prevents parent copy from deallocating any arrays
+  allocated = 0;
+  cutsq = NULL;
+  eatom = NULL;
+  vatom = NULL;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairCoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
+{
+  eflag = eflag_in;
+  vflag = vflag_in;
+
+
+  if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
+
+  double ecoul = 0.0;
+  if (eflag || vflag) ev_setup(eflag,vflag);
+  else evflag = vflag_fdotr = 0;
+
+  atomKK->sync(execution_space,datamask_read);
+  k_cutsq.template sync<DeviceType>();
+  k_cut_ljsq.template sync<DeviceType>();
+  k_cut_coulsq.template sync<DeviceType>();
+  k_params.template sync<DeviceType>();
+  if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
+  else atomKK->modified(execution_space,F_MASK);
+
+  x = atomKK->k_x.view<DeviceType>();
+  c_x = atomKK->k_x.view<DeviceType>();
+  f = atomKK->k_f.view<DeviceType>();
+  q = atomKK->k_q.view<DeviceType>();
+  type = atomKK->k_type.view<DeviceType>();
+  nlocal = atom->nlocal;
+  nall = atom->nlocal + atom->nghost;
+  newton_pair = force->newton_pair;
+  special_lj[0] = force->special_lj[0];
+  special_lj[1] = force->special_lj[1];
+  special_lj[2] = force->special_lj[2];
+  special_lj[3] = force->special_lj[3];
+  special_coul[0] = force->special_coul[0];
+  special_coul[1] = force->special_coul[1];
+  special_coul[2] = force->special_coul[2];
+  special_coul[3] = force->special_coul[3];
+  qqrd2e = force->qqrd2e;
+
+  // loop over neighbors of my atoms
+
+  EV_FLOAT ev = pair_compute<PairCoulCutKokkos<DeviceType>,void >
+    (this,(NeighListKokkos<DeviceType>*)list);
+
+  DeviceType::fence();
+
+  if (eflag) eng_coul += ev.ecoul;
+  if (vflag_global) {
+    virial[0] += ev.v[0];
+    virial[1] += ev.v[1];
+    virial[2] += ev.v[2];
+    virial[3] += ev.v[3];
+    virial[4] += ev.v[4];
+    virial[5] += ev.v[5];
+  }
+
+  if (vflag_fdotr) pair_virial_fdotr_compute(this);
+}
+
+template<class DeviceType>
+template<bool STACKPARAMS, class Specialisation>
+KOKKOS_INLINE_FUNCTION
+F_FLOAT PairCoulCutKokkos<DeviceType>::
+compute_fcoul(const F_FLOAT& rsq, const int& i, const int&j, const int& itype,
+              const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const {
+  const F_FLOAT r2inv = 1.0/rsq;
+  const F_FLOAT rinv = sqrt(r2inv);
+  F_FLOAT forcecoul;
+
+  forcecoul = qqrd2e*(STACKPARAMS?m_params[itype][jtype].scale:params(itype,jtype).scale)*
+    qtmp *q(j) *rinv;
+
+  return factor_coul*forcecoul*r2inv;
+}
+
+template<class DeviceType>
+template<bool STACKPARAMS, class Specialisation>
+KOKKOS_INLINE_FUNCTION
+F_FLOAT PairCoulCutKokkos<DeviceType>::
+compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j, const int& itype,
+              const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const {
+  const F_FLOAT r2inv = 1.0/rsq;
+  const F_FLOAT rinv = sqrt(r2inv);
+
+  return factor_coul*qqrd2e * (STACKPARAMS?m_params[itype][jtype].scale:params(itype,jtype).scale)
+    * qtmp *q(j)*rinv;
+}
+
+/* ----------------------------------------------------------------------
+   allocate all arrays
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairCoulCutKokkos<DeviceType>::allocate()
+{
+  PairCoulCut::allocate();
+
+  int n = atom->ntypes;
+  memory->destroy(cutsq);
+  memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq");
+  d_cutsq = k_cutsq.template view<DeviceType>();
+
+  k_cut_ljsq = typename ArrayTypes<DeviceType>::tdual_ffloat_2d("pair:cut_ljsq",n+1,n+1);
+  d_cut_ljsq = k_cut_ljsq.template view<DeviceType>();
+  k_cut_coulsq = typename ArrayTypes<DeviceType>::tdual_ffloat_2d("pair:cut_coulsq",n+1,n+1);
+  d_cut_coulsq = k_cut_coulsq.template view<DeviceType>();
+
+  k_params = Kokkos::DualView<params_coul**,Kokkos::LayoutRight,DeviceType>("PairCoulCut::params",n+1,n+1);
+  params = k_params.d_view;
+}
+
+/* ----------------------------------------------------------------------
+   global settings
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairCoulCutKokkos<DeviceType>::settings(int narg, char **arg)
+{
+  // \todo check what should be the limit on narg
+  if (narg > 2) error->all(FLERR,"Illegal pair_style command");
+
+  PairCoulCut::settings(1,arg);
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairCoulCutKokkos<DeviceType>::init_style()
+{
+  PairCoulCut::init_style();
+
+  // irequest = neigh request made by parent class
+
+  neighflag = lmp->kokkos->neighflag;
+  int irequest = neighbor->nrequest - 1;
+
+  neighbor->requests[irequest]->
+    kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value &&
+    !Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
+  neighbor->requests[irequest]->
+    kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
+
+  if (neighflag == FULL) {
+    neighbor->requests[irequest]->full = 1;
+    neighbor->requests[irequest]->half = 0;
+    neighbor->requests[irequest]->full_cluster = 0;
+  } else if (neighflag == HALF || neighflag == HALFTHREAD) {
+    neighbor->requests[irequest]->full = 0;
+    neighbor->requests[irequest]->half = 1;
+    neighbor->requests[irequest]->full_cluster = 0;
+  } else {
+    error->all(FLERR,"Cannot use chosen neighbor list style with coul/cut/kk");
+  }
+}
+
+/* ----------------------------------------------------------------------
+   init for one type pair i,j and corresponding j,i
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+double PairCoulCutKokkos<DeviceType>::init_one(int i, int j)
+{
+  double cutone = PairCoulCut::init_one(i,j);
+
+  k_params.h_view(i,j).scale = scale[i][j];
+  k_params.h_view(i,j).cutsq = cutone*cutone;
+  k_params.h_view(j,i) = k_params.h_view(i,j);
+
+  if(i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) {
+    m_params[i][j] = m_params[j][i] = k_params.h_view(i,j);
+    m_cutsq[j][i] = m_cutsq[i][j] = cutone*cutone;
+    m_cut_ljsq[j][i] = m_cut_ljsq[i][j] = cutone*cutone;
+    m_cut_coulsq[j][i] = m_cut_coulsq[i][j] = cutone*cutone;
+  }
+  k_cutsq.h_view(i,j) = cutone*cutone;
+  k_cutsq.template modify<LMPHostType>();
+  k_cut_ljsq.h_view(i,j) = cutone*cutone;
+  k_cut_ljsq.template modify<LMPHostType>();
+  k_cut_coulsq.h_view(i,j) = cutone*cutone;
+  k_cut_coulsq.template modify<LMPHostType>();
+  k_params.template modify<LMPHostType>();
+
+  return cutone;
+}
+
+
+
+template class PairCoulCutKokkos<LMPDeviceType>;
+#ifdef KOKKOS_HAVE_CUDA
+template class PairCoulCutKokkos<LMPHostType>;
+#endif
diff --git a/src/KOKKOS/pair_coul_cut_kokkos.h b/src/KOKKOS/pair_coul_cut_kokkos.h
new file mode 100644
index 0000000000000000000000000000000000000000..96e4ca0f6313cf4d340a1589861f7f66a643589f
--- /dev/null
+++ b/src/KOKKOS/pair_coul_cut_kokkos.h
@@ -0,0 +1,137 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(coul/cut/kk,PairCoulCutKokkos<LMPDeviceType>)
+PairStyle(coul/cut/kk/device,PairCoulCutKokkos<LMPDeviceType>)
+PairStyle(coul/cut/kk/host,PairCoulCutKokkos<LMPHostType>)
+
+#else
+
+#ifndef LMP_PAIR_COUL_CUT_KOKKOS_H
+#define LMP_PAIR_COUL_CUT_KOKKOS_H
+
+#include "pair_kokkos.h"
+#include "pair_coul_cut.h"
+#include "neigh_list_kokkos.h"
+
+namespace LAMMPS_NS {
+
+template<class DeviceType>
+class PairCoulCutKokkos : public PairCoulCut {
+ public:
+  enum {EnabledNeighFlags=FULL&HALFTHREAD&HALF};
+  enum {COUL_FLAG=1};
+  typedef DeviceType device_type;
+  PairCoulCutKokkos(class LAMMPS *);
+  ~PairCoulCutKokkos();
+
+  void compute(int, int);
+
+  void settings(int, char **);
+  void init_style();
+  double init_one(int, int);
+
+  struct params_coul{
+    params_coul(){cutsq=0,scale=0;};
+    params_coul(int i){cutsq=0,scale=0;};
+    F_FLOAT cutsq, scale;
+  };
+
+ protected:
+  void cleanup_copy();
+
+  template<bool STACKPARAMS, class Specialisation>
+  KOKKOS_INLINE_FUNCTION
+  F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j,
+                        const int& itype, const int& jtype) const {
+    return 0.0;
+  }
+
+  template<bool STACKPARAMS, class Specialisation>
+  KOKKOS_INLINE_FUNCTION
+  F_FLOAT compute_fcoul(const F_FLOAT& rsq, const int& i, const int&j,
+                        const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const;
+
+  template<bool STACKPARAMS, class Specialisation>
+  KOKKOS_INLINE_FUNCTION
+    F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j,
+                          const int& itype, const int& jtype) const{
+    return 0;
+  }
+
+  template<bool STACKPARAMS, class Specialisation>
+  KOKKOS_INLINE_FUNCTION
+  F_FLOAT compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j,
+                        const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const;
+
+  Kokkos::DualView<params_coul**,Kokkos::LayoutRight,DeviceType> k_params;
+  typename Kokkos::DualView<params_coul**,
+    Kokkos::LayoutRight,DeviceType>::t_dev_const params;
+  // hardwired to space for 15 atom types
+  params_coul m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
+
+  F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
+  F_FLOAT m_cut_ljsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
+  F_FLOAT m_cut_coulsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
+  typename ArrayTypes<DeviceType>::t_x_array_randomread x;
+  typename ArrayTypes<DeviceType>::t_x_array c_x;
+  typename ArrayTypes<DeviceType>::t_f_array f;
+  typename ArrayTypes<DeviceType>::t_float_1d_randomread q;
+  typename ArrayTypes<DeviceType>::t_int_1d_randomread type;
+  typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
+  typename ArrayTypes<DeviceType>::t_virial_array d_vatom;
+
+  int newton_pair;
+
+  typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cutsq;
+  typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq;
+  typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cut_ljsq;
+  typename ArrayTypes<DeviceType>::t_ffloat_2d d_cut_ljsq;
+  typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cut_coulsq;
+  typename ArrayTypes<DeviceType>::t_ffloat_2d d_cut_coulsq;
+
+  class AtomKokkos *atomKK;
+  int neighflag;
+  int nlocal,nall,eflag,vflag;
+
+  double special_coul[4];
+  double special_lj[4];
+  double qqrd2e;
+
+  void allocate();
+  friend class PairComputeFunctor<PairCoulCutKokkos,FULL,true>;
+  friend class PairComputeFunctor<PairCoulCutKokkos,HALF,true>;
+  friend class PairComputeFunctor<PairCoulCutKokkos,HALFTHREAD,true>;
+  friend class PairComputeFunctor<PairCoulCutKokkos,N2,true>;
+  friend class PairComputeFunctor<PairCoulCutKokkos,FULLCLUSTER,true >;
+  friend class PairComputeFunctor<PairCoulCutKokkos,FULL,false>;
+  friend class PairComputeFunctor<PairCoulCutKokkos,HALF,false>;
+  friend class PairComputeFunctor<PairCoulCutKokkos,HALFTHREAD,false>;
+  friend class PairComputeFunctor<PairCoulCutKokkos,N2,false>;
+  friend class PairComputeFunctor<PairCoulCutKokkos,FULLCLUSTER,false >;
+  friend EV_FLOAT pair_compute<PairCoulCutKokkos,void>(PairCoulCutKokkos*,
+                                                       NeighListKokkos<DeviceType>*);
+  friend void pair_virial_fdotr_compute<PairCoulCutKokkos>(PairCoulCutKokkos*);
+
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+*/
diff --git a/src/KOKKOS/pair_kokkos.h b/src/KOKKOS/pair_kokkos.h
index de67e7df0bd9d6a5df8b908cd12fee25835a66ea..2204399a39525cbfa97410c876cda74fbbc9c6f0 100644
--- a/src/KOKKOS/pair_kokkos.h
+++ b/src/KOKKOS/pair_kokkos.h
@@ -25,27 +25,72 @@
 
 namespace LAMMPS_NS {
 
+template<int Table>
+struct CoulLongTable {
+  enum {DoTable = Table};
+};
+
+// Tags for doing coulomb calculations or not
+// They facilitate function overloading, since
+// partial template specialization of member functions is not allowed
+struct CoulTag {};
+struct NoCoulTag {};
+
+template<int FLAG>
+struct DoCoul {
+  typedef NoCoulTag type;
+};
+
+template<>
+struct DoCoul<1> {
+  typedef CoulTag type;
+};
+
+// Determine memory traits for force array
+// Do atomic trait when running HALFTHREAD neighbor list style
+template<int NEIGHFLAG>
+struct AtomicF {
+  enum {value = Kokkos::Unmanaged};
+};
+
+template<>
+struct AtomicF<HALFTHREAD> {
+  enum {value = Kokkos::Atomic|Kokkos::Unmanaged};
+};
+
+//Specialisation for Neighborlist types Half, HalfThread, Full
 template <class PairStyle, int NEIGHFLAG, bool STACKPARAMS, class Specialisation = void>
 struct PairComputeFunctor  {
   typedef typename PairStyle::device_type device_type ;
+
+  // Reduction type, contains evdwl, ecoul and virial[6]
   typedef EV_FLOAT value_type;
 
+  // The copy of the pair style
   PairStyle c;
+
+  // The force array is atomic for Half/Thread neighbor style
+  Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,
+               device_type,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > f;
   NeighListKokkos<device_type> list;
 
   PairComputeFunctor(PairStyle* c_ptr,
                           NeighListKokkos<device_type>* list_ptr):
-  c(*c_ptr),list(*list_ptr) {};
+  c(*c_ptr),f(c.f),list(*list_ptr) {};
+
+  // Call cleanup_copy which sets allocations NULL which are destructed by the PairStyle
   ~PairComputeFunctor() {c.cleanup_copy();list.clean_copy();};
 
   KOKKOS_INLINE_FUNCTION int sbmask(const int& j) const {
     return j >> SBBITS & 3;
   }
 
+  // Loop over neighbors of one atom without coulomb interaction
+  // This function is called in parallel
   template<int EVFLAG, int NEWTON_PAIR>
   KOKKOS_FUNCTION
   EV_FLOAT compute_item(const int& ii,
-                        const NeighListKokkos<device_type> &list) const {
+                        const NeighListKokkos<device_type> &list, const NoCoulTag&) const {
     EV_FLOAT ev;
     const int i = list.d_ilist[ii];
     const X_FLOAT xtmp = c.x(i,0);
@@ -77,25 +122,17 @@ struct PairComputeFunctor  {
         fxtmp += delx*fpair;
         fytmp += dely*fpair;
         fztmp += delz*fpair;
-        if ((NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < c.nlocal)) {
-          Kokkos::atomic_fetch_add(&c.f(j,0),-delx*fpair);
-          Kokkos::atomic_fetch_add(&c.f(j,1),-dely*fpair);
-          Kokkos::atomic_fetch_add(&c.f(j,2),-delz*fpair);
-        }
 
-        if ((NEIGHFLAG==HALF) && (NEWTON_PAIR || j < c.nlocal)) {
-          c.f(j,0) -= delx*fpair;
-          c.f(j,1) -= dely*fpair;
-          c.f(j,2) -= delz*fpair;
+        if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < c.nlocal)) {
+          f(j,0) -= delx*fpair;
+          f(j,1) -= dely*fpair;
+          f(j,2) -= delz*fpair;
         }
 
         if (EVFLAG) {
           if (c.eflag) {
             ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<c.nlocal)))?1.0:0.5)*
               factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
-            if (c.COUL_FLAG)
-              ev.ecoul += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<c.nlocal)))?1.0:0.5)*
-                factor_lj * c.template compute_ecoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
           }
 
           if (c.vflag_either) ev_tally(ev,i,j,fpair,delx,dely,delz);
@@ -103,16 +140,84 @@ struct PairComputeFunctor  {
       }
 
     }
-    if (NEIGHFLAG == HALFTHREAD) {
-      Kokkos::atomic_fetch_add(&c.f(i,0),fxtmp);
-      Kokkos::atomic_fetch_add(&c.f(i,1),fytmp);
-      Kokkos::atomic_fetch_add(&c.f(i,2),fztmp);
-    } else {
-      c.f(i,0) += fxtmp;
-      c.f(i,1) += fytmp;
-      c.f(i,2) += fztmp;
+
+    f(i,0) += fxtmp;
+    f(i,1) += fytmp;
+    f(i,2) += fztmp;
+
+    return ev;
+  }
+
+  // Loop over neighbors of one atom with coulomb interaction
+  // This function is called in parallel
+  template<int EVFLAG, int NEWTON_PAIR>
+  KOKKOS_FUNCTION
+  EV_FLOAT compute_item(const int& ii,
+                        const NeighListKokkos<device_type> &list, const CoulTag& ) const {
+    EV_FLOAT ev;
+    const int i = list.d_ilist[ii];
+    const X_FLOAT xtmp = c.x(i,0);
+    const X_FLOAT ytmp = c.x(i,1);
+    const X_FLOAT ztmp = c.x(i,2);
+    const int itype = c.type(i);
+    const F_FLOAT qtmp = c.q(i);
+
+    const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
+    const int jnum = list.d_numneigh[i];
+
+    F_FLOAT fxtmp = 0.0;
+    F_FLOAT fytmp = 0.0;
+    F_FLOAT fztmp = 0.0;
+
+    for (int jj = 0; jj < jnum; jj++) {
+      int j = neighbors_i(jj);
+      const F_FLOAT factor_lj = c.special_lj[sbmask(j)];
+      const F_FLOAT factor_coul = c.special_coul[sbmask(j)];
+      j &= NEIGHMASK;
+      const X_FLOAT delx = xtmp - c.x(j,0);
+      const X_FLOAT dely = ytmp - c.x(j,1);
+      const X_FLOAT delz = ztmp - c.x(j,2);
+      const int jtype = c.type(j);
+      const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
+
+      if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) {
+
+        F_FLOAT fpair = F_FLOAT();
+
+        if(rsq < (STACKPARAMS?c.m_cut_ljsq[itype][jtype]:c.d_cut_ljsq(itype,jtype)))
+          fpair+=factor_lj*c.template compute_fpair<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
+        if(rsq < (STACKPARAMS?c.m_cut_coulsq[itype][jtype]:c.d_cut_coulsq(itype,jtype)))
+          fpair+=c.template compute_fcoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype,factor_coul,qtmp);
+
+        fxtmp += delx*fpair;
+        fytmp += dely*fpair;
+        fztmp += delz*fpair;
+
+        if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < c.nlocal)) {
+          f(j,0) -= delx*fpair;
+          f(j,1) -= dely*fpair;
+          f(j,2) -= delz*fpair;
+        }
+
+        if (EVFLAG) {
+          if (c.eflag) {
+            if(rsq < (STACKPARAMS?c.m_cut_ljsq[itype][jtype]:c.d_cut_ljsq(itype,jtype)))
+            ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<c.nlocal)))?1.0:0.5)*
+              factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
+            if(rsq < (STACKPARAMS?c.m_cut_coulsq[itype][jtype]:c.d_cut_coulsq(itype,jtype)))
+            ev.ecoul += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<c.nlocal)))?1.0:0.5)*
+              c.template compute_ecoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype,factor_coul,qtmp);
+          }
+
+          if (c.vflag_either) ev_tally(ev,i,j,fpair,delx,dely,delz);
+        }
+      }
     }
 
+    f(i,0) += fxtmp;
+    f(i,1) += fytmp;
+    f(i,2) += fztmp;
+
     return ev;
   }
 
@@ -142,7 +247,7 @@ struct PairComputeFunctor  {
       const E_FLOAT v5 = dely*delz*fpair;
 
       if (c.vflag_global) {
-        if (NEIGHFLAG) {
+        if (NEIGHFLAG!=FULL) {
           if (NEWTON_PAIR) {
             ev.v[0] += v0;
             ev.v[1] += v1;
@@ -202,43 +307,17 @@ struct PairComputeFunctor  {
 
   KOKKOS_INLINE_FUNCTION
   void operator()(const int i) const {
-    if (c.newton_pair) compute_item<0,1>(i,list);
-    else compute_item<0,0>(i,list);
+    if (c.newton_pair) compute_item<0,1>(i,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
+    else compute_item<0,0>(i,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
   }
 
   KOKKOS_INLINE_FUNCTION
   void operator()(const int i, value_type &energy_virial) const {
     if (c.newton_pair)
-      energy_virial += compute_item<1,1>(i,list);
+      energy_virial += compute_item<1,1>(i,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
     else
-      energy_virial += compute_item<1,0>(i,list);
-  }
-
-  KOKKOS_INLINE_FUNCTION
-  static void init(volatile value_type &update) {
-    update.evdwl = 0;
-    update.ecoul = 0;
-    update.v[0] = 0;
-    update.v[1] = 0;
-    update.v[2] = 0;
-    update.v[3] = 0;
-    update.v[4] = 0;
-    update.v[5] = 0;
+      energy_virial += compute_item<1,0>(i,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
   }
-  KOKKOS_INLINE_FUNCTION 
-  static void join(volatile value_type &update,
-                   const volatile value_type &source) {
-    update.evdwl += source.evdwl;
-    update.ecoul += source.ecoul;
-    update.v[0] += source.v[0];
-    update.v[1] += source.v[1];
-    update.v[2] += source.v[2];
-    update.v[3] += source.v[3];
-    update.v[4] += source.v[4];
-    update.v[5] += source.v[5];
-  }
-
-
 };
 
 template <class PairStyle, bool STACKPARAMS, class Specialisation>
@@ -261,8 +340,8 @@ struct PairComputeFunctor<PairStyle,FULLCLUSTER,STACKPARAMS,Specialisation>  {
 
   template<int EVFLAG, int NEWTON_PAIR>
   KOKKOS_FUNCTION
-  EV_FLOAT compute_item(const device_type& dev,
-                        const NeighListKokkos<device_type> &list) const {
+  EV_FLOAT compute_item(const typename Kokkos::TeamPolicy<device_type>::member_type& dev,
+                        const NeighListKokkos<device_type> &list, const NoCoulTag& ) const {
     EV_FLOAT ev;
     const int i = vectorization::global_thread_rank(dev);
 
@@ -302,9 +381,6 @@ struct PairComputeFunctor<PairStyle,FULLCLUSTER,STACKPARAMS,Specialisation>  {
             if (c.eflag) {
               ev.evdwl += 0.5*
                 factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
-              if (c.COUL_FLAG)
-                ev.ecoul += 0.5*
-                  factor_lj * c.template compute_ecoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
             }
 
             if (c.vflag_either) ev_tally(ev,i,j,fpair,delx,dely,delz);
@@ -373,44 +449,18 @@ struct PairComputeFunctor<PairStyle,FULLCLUSTER,STACKPARAMS,Specialisation>  {
   }
 
   KOKKOS_INLINE_FUNCTION
-  void operator()(const device_type& dev) const {
-    if (c.newton_pair) compute_item<0,1>(dev,list);
-    else compute_item<0,0>(dev,list);
+  void operator()(const  typename Kokkos::TeamPolicy<device_type>::member_type& dev) const {
+    if (c.newton_pair) compute_item<0,1>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
+    else compute_item<0,0>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
   }
 
   KOKKOS_INLINE_FUNCTION
-  void operator()(const device_type& dev, value_type &energy_virial) const {
+  void operator()(const  typename Kokkos::TeamPolicy<device_type>::member_type& dev, value_type &energy_virial) const {
     if (c.newton_pair)
-      energy_virial += compute_item<1,1>(dev,list);
+      energy_virial += compute_item<1,1>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
     else
-      energy_virial += compute_item<1,0>(dev,list);
-  }
-
-  KOKKOS_INLINE_FUNCTION
-  static void init(volatile value_type &update) {
-    update.evdwl = 0;
-    update.ecoul = 0;
-    update.v[0] = 0;
-    update.v[1] = 0;
-    update.v[2] = 0;
-    update.v[3] = 0;
-    update.v[4] = 0;
-    update.v[5] = 0;
-  }
-  KOKKOS_INLINE_FUNCTION
-  static void join(volatile value_type &update,
-                   const volatile value_type &source) {
-    update.evdwl += source.evdwl;
-    update.ecoul += source.ecoul;
-    update.v[0] += source.v[0];
-    update.v[1] += source.v[1];
-    update.v[2] += source.v[2];
-    update.v[3] += source.v[3];
-    update.v[4] += source.v[4];
-    update.v[5] += source.v[5];
+      energy_virial += compute_item<1,0>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
   }
-
-
 };
 
 template <class PairStyle, bool STACKPARAMS, class Specialisation>
@@ -433,7 +483,8 @@ struct PairComputeFunctor<PairStyle,N2,STACKPARAMS,Specialisation>  {
   template<int EVFLAG, int NEWTON_PAIR>
   KOKKOS_FUNCTION
   EV_FLOAT compute_item(const int& ii,
-                        const NeighListKokkos<device_type> &list) const {
+                        const NeighListKokkos<device_type> &list, const NoCoulTag&) const {
+    (void) list;
     EV_FLOAT ev;
     const int i = ii;//list.d_ilist[ii];
     const X_FLOAT xtmp = c.x(i,0);
@@ -470,9 +521,6 @@ struct PairComputeFunctor<PairStyle,N2,STACKPARAMS,Specialisation>  {
           if (c.eflag) {
             ev.evdwl += 0.5*
               factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
-            if (c.COUL_FLAG)
-              ev.ecoul += 0.5*
-                factor_lj * c.template compute_ecoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
           }
 
           if (c.vflag_either) ev_tally(ev,i,j,fpair,delx,dely,delz);
@@ -535,116 +583,156 @@ struct PairComputeFunctor<PairStyle,N2,STACKPARAMS,Specialisation>  {
 
   KOKKOS_INLINE_FUNCTION
   void operator()(const int i) const {
-    compute_item<0,0>(i,list);
+    compute_item<0,0>(i,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
   }
 
   KOKKOS_INLINE_FUNCTION
   void operator()(const int i, value_type &energy_virial) const {
-    energy_virial += compute_item<1,0>(i,list);
+    energy_virial += compute_item<1,0>(i,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
   }
+};
 
-  KOKKOS_INLINE_FUNCTION
-  static void init(volatile value_type &update) {
-    update.evdwl = 0;
-    update.ecoul = 0;
-    update.v[0] = 0;
-    update.v[1] = 0;
-    update.v[2] = 0;
-    update.v[3] = 0;
-    update.v[4] = 0;
-    update.v[5] = 0;
-  }
-  KOKKOS_INLINE_FUNCTION
-  static void join(volatile value_type &update,
-                   const volatile value_type &source) {
-    update.evdwl += source.evdwl;
-    update.ecoul += source.ecoul;
-    update.v[0] += source.v[0];
-    update.v[1] += source.v[1];
-    update.v[2] += source.v[2];
-    update.v[3] += source.v[3];
-    update.v[4] += source.v[4];
-    update.v[5] += source.v[5];
-  }
+// Filter out Neighflags which are not supported for PairStyle
+// The enable_if clause will invalidate the last parameter of the function, so that
+// a match is only achieved, if PairStyle supports the specific neighborlist variant.
+// This uses the fact that failure to match template parameters is not an error.
+// By having the enable_if with a ! and without it, exactly one of the two versions of the functions
+// pair_compute_neighlist and pair_compute_fullcluster will match - either the dummy version
+// or the real one further below.
+template<class PairStyle, unsigned NEIGHFLAG, class Specialisation>
+EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable_if<!(NEIGHFLAG&PairStyle::EnabledNeighFlags), NeighListKokkos<typename PairStyle::device_type>*>::type list) {
+  EV_FLOAT ev;
+  (void) fpair;
+  (void) list;
+  printf("ERROR: calling pair_compute with invalid neighbor list style: requested %i  available %i",NEIGHFLAG,PairStyle::EnabledNeighFlags);
+  return ev;
+}
 
+template<class PairStyle, class Specialisation>
+EV_FLOAT pair_compute_fullcluster (PairStyle* fpair, typename Kokkos::Impl::enable_if<!(FULLCLUSTER&PairStyle::EnabledNeighFlags), NeighListKokkos<typename PairStyle::device_type>*>::type list) {
+  EV_FLOAT ev;
+  (void) fpair;
+  (void) list;
+  printf("ERROR: calling pair_compute with invalid neighbor list style: requested %i  available %i",FULLCLUSTER,PairStyle::EnabledNeighFlags);
+  return ev;
+}
 
-};
+// Submit ParallelFor for NEIGHFLAG=HALF,HALFTHREAD,FULL,N2
+template<class PairStyle, unsigned NEIGHFLAG, class Specialisation>
+EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable_if<NEIGHFLAG&PairStyle::EnabledNeighFlags, NeighListKokkos<typename PairStyle::device_type>*>::type list) {
+  EV_FLOAT ev;
+  if(fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
+    PairComputeFunctor<PairStyle,NEIGHFLAG,false,Specialisation > ff(fpair,list);
+    if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
+    else                              Kokkos::parallel_for(list->inum,ff);
+  } else {
+    PairComputeFunctor<PairStyle,NEIGHFLAG,true,Specialisation > ff(fpair,list);
+    if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
+    else                              Kokkos::parallel_for(list->inum,ff);
+  }
+  return ev;
+}
 
+// Submit ParallelFor for NEIGHFLAG=FULLCLUSTER
 template<class PairStyle, class Specialisation>
-EV_FLOAT pair_compute (PairStyle* fpair, NeighListKokkos<typename PairStyle::device_type>* list) {
+EV_FLOAT pair_compute_fullcluster (PairStyle* fpair, typename Kokkos::Impl::enable_if<FULLCLUSTER&PairStyle::EnabledNeighFlags, NeighListKokkos<typename PairStyle::device_type>*>::type list) {
   EV_FLOAT ev;
   if(fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
-    if (fpair->neighflag == FULL) {
-      PairComputeFunctor<PairStyle,FULL,false,Specialisation >
-        ff(fpair, list);
-      if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
-      else Kokkos::parallel_for(list->inum,ff);
-    } else if (fpair->neighflag == HALFTHREAD) {
-      PairComputeFunctor<PairStyle,HALFTHREAD,false,Specialisation >
-        ff(fpair, list);
-      if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
-      else Kokkos::parallel_for(list->inum,ff);
-    } else if (fpair->neighflag == HALF) {
-      PairComputeFunctor<PairStyle,HALF,false,Specialisation >
-        ff(fpair, list);
-      if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
-      else Kokkos::parallel_for(list->inum,ff);
-    } else if (fpair->neighflag == N2) {
-      PairComputeFunctor<PairStyle,N2,false,Specialisation >
-        ff(fpair, list);
-      if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(fpair->nlocal,ff,ev);
-      else Kokkos::parallel_for(fpair->nlocal,ff);
-    } else if (fpair->neighflag == FULLCLUSTER) {
-      typedef PairComputeFunctor<PairStyle,FULLCLUSTER,false,Specialisation >
-        f_type;
-      f_type ff(fpair, list);
-      #ifdef KOKKOS_HAVE_CUDA
-        const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 256 : 1;
-      #else
-        const int teamsize = 1;
-      #endif
-      const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize;
-      if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(Kokkos::ParallelWorkRequest(nteams,teamsize),ff,ev);
-      else Kokkos::parallel_for(Kokkos::ParallelWorkRequest(nteams,teamsize),ff);
-    }
+    typedef PairComputeFunctor<PairStyle,FULLCLUSTER,false,Specialisation >
+      f_type;
+    f_type ff(fpair, list);
+    #ifdef KOKKOS_HAVE_CUDA
+      const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 256 : 1;
+    #else
+      const int teamsize = 1;
+    #endif
+    const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize;
+    Kokkos::TeamPolicy<typename f_type::device_type> config(nteams,teamsize);
+    if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(config,ff,ev);
+    else Kokkos::parallel_for(config,ff);
   } else {
-    if (fpair->neighflag == FULL) {
-      PairComputeFunctor<PairStyle,FULL,true,Specialisation >
-        ff(fpair, list);
-      if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
-      else Kokkos::parallel_for(list->inum,ff);
-    } else if (fpair->neighflag == HALFTHREAD) {
-      PairComputeFunctor<PairStyle,HALFTHREAD,true,Specialisation >
-        ff(fpair, list);
-      if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
-      else Kokkos::parallel_for(list->inum,ff);
-    } else if (fpair->neighflag == HALF) {
-      PairComputeFunctor<PairStyle,HALF,true,Specialisation >
-        ff(fpair, list);
-      if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
-      else Kokkos::parallel_for(list->inum,ff);
-    } else if (fpair->neighflag == N2) {
-      PairComputeFunctor<PairStyle,N2,true,Specialisation >
-        ff(fpair, list);
-      if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(fpair->nlocal,ff,ev);
-      else Kokkos::parallel_for(fpair->nlocal,ff);
-    } else if (fpair->neighflag == FULLCLUSTER) {
-      typedef PairComputeFunctor<PairStyle,FULLCLUSTER,true,Specialisation >
-        f_type;
-      f_type ff(fpair, list);
-      #ifdef KOKKOS_HAVE_CUDA
-        const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 256 : 1;
-      #else
-        const int teamsize = 1;
-      #endif
-      const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize;
-      if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(Kokkos::ParallelWorkRequest(nteams,teamsize),ff,ev);
-      else Kokkos::parallel_for(Kokkos::ParallelWorkRequest(nteams,teamsize),ff);
-    }
+    typedef PairComputeFunctor<PairStyle,FULLCLUSTER,true,Specialisation >
+      f_type;
+    f_type ff(fpair, list);
+    #ifdef KOKKOS_HAVE_CUDA
+      const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 256 : 1;
+    #else
+      const int teamsize = 1;
+    #endif
+    const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize;
+    Kokkos::TeamPolicy<typename f_type::device_type> config(nteams,teamsize);
+    if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(config,ff,ev);
+    else Kokkos::parallel_for(config,ff);
   }
   return ev;
 }
 
+
+template<class PairStyle, class Specialisation>
+EV_FLOAT pair_compute (PairStyle* fpair, NeighListKokkos<typename PairStyle::device_type>* list) {
+  EV_FLOAT ev;
+  if (fpair->neighflag == FULL) {
+    ev = pair_compute_neighlist<PairStyle,FULL,Specialisation> (fpair,list);
+  } else if (fpair->neighflag == HALFTHREAD) {
+    ev = pair_compute_neighlist<PairStyle,HALFTHREAD,Specialisation> (fpair,list);
+  } else if (fpair->neighflag == HALF) {
+    ev = pair_compute_neighlist<PairStyle,HALF,Specialisation> (fpair,list);
+  } else if (fpair->neighflag == N2) {
+    ev = pair_compute_neighlist<PairStyle,N2,Specialisation> (fpair,list);
+  } else if (fpair->neighflag == FULLCLUSTER) {
+    ev = pair_compute_fullcluster<PairStyle,Specialisation> (fpair,list);
+  }
+  return ev;
+}
+
+template<class DeviceType>
+struct PairVirialFDotRCompute {
+  typedef ArrayTypes<DeviceType> AT;
+  typedef EV_FLOAT value_type;
+  typename AT::t_x_array_const x;
+  typename AT::t_f_array_const f;
+  const int offset;
+
+  PairVirialFDotRCompute(  typename AT::t_x_array_const x_,
+  typename AT::t_f_array_const f_,
+  const int offset_):x(x_),f(f_),offset(offset_) {}
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(const int j, value_type &energy_virial) const {
+    const int i = j + offset;
+    energy_virial.v[0] += f(i,0)*x(i,0);
+    energy_virial.v[1] += f(i,1)*x(i,1);
+    energy_virial.v[2] += f(i,2)*x(i,2);
+    energy_virial.v[3] += f(i,1)*x(i,0);
+    energy_virial.v[4] += f(i,2)*x(i,0);
+    energy_virial.v[5] += f(i,2)*x(i,1);
+  }
+};
+
+template<class PairStyle>
+void pair_virial_fdotr_compute(PairStyle* fpair) {
+  EV_FLOAT virial;
+  if (fpair->neighbor->includegroup == 0) {
+    int nall = fpair->atom->nlocal + fpair->atom->nghost;
+    Kokkos::parallel_reduce(nall,PairVirialFDotRCompute<typename PairStyle::device_type>(fpair->x,fpair->f,0),virial);
+  } else {
+    Kokkos::parallel_reduce(fpair->atom->nfirst,PairVirialFDotRCompute<typename PairStyle::device_type>(fpair->x,fpair->f,0),virial);
+    EV_FLOAT virial_ghost;
+    Kokkos::parallel_reduce(fpair->atom->nghost,PairVirialFDotRCompute<typename PairStyle::device_type>(fpair->x,fpair->f,fpair->atom->nlocal),virial_ghost);
+    virial+=virial_ghost;
+  }
+  fpair->vflag_fdotr = 0;
+  fpair->virial[0] = virial.v[0];
+  fpair->virial[1] = virial.v[1];
+  fpair->virial[2] = virial.v[2];
+  fpair->virial[3] = virial.v[3];
+  fpair->virial[4] = virial.v[4];
+  fpair->virial[5] = virial.v[5];
+}
+
+
+
+
 }
 
 #endif
diff --git a/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.cpp b/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..97fa91a453b3b6b5d86e855725de729a9eb29744
--- /dev/null
+++ b/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.cpp
@@ -0,0 +1,347 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "string.h"
+#include "pair_lj_cut_coul_cut_kokkos.h"
+#include "kokkos.h"
+#include "atom_kokkos.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "neigh_request.h"
+#include "update.h"
+#include "integrate.h"
+#include "respa.h"
+#include "math_const.h"
+#include "memory.h"
+#include "error.h"
+#include "atom_masks.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define KOKKOS_CUDA_MAX_THREADS 256
+#define KOKKOS_CUDA_MIN_BLOCKS 8
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairLJCutCoulCutKokkos<DeviceType>::PairLJCutCoulCutKokkos(LAMMPS *lmp):PairLJCutCoulCut(lmp)
+{
+  respa_enable = 0;
+
+  atomKK = (AtomKokkos *) atom;
+  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
+  datamask_read = X_MASK | F_MASK | TYPE_MASK | Q_MASK | ENERGY_MASK | VIRIAL_MASK;
+  datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
+  cutsq = NULL;
+  cut_ljsq = NULL;
+  cut_coulsq = NULL;
+
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairLJCutCoulCutKokkos<DeviceType>::~PairLJCutCoulCutKokkos()
+{
+  if (allocated){
+    memory->destroy_kokkos(k_cutsq, cutsq);
+    memory->destroy_kokkos(k_cut_ljsq, cut_ljsq);
+    memory->destroy_kokkos(k_cut_coulsq, cut_coulsq);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairLJCutCoulCutKokkos<DeviceType>::cleanup_copy() {
+  // WHY needed: this prevents parent copy from deallocating any arrays
+  allocated = 0;
+  cutsq = NULL;
+  cut_ljsq = NULL;
+  cut_coulsq = NULL;
+  eatom = NULL;
+  vatom = NULL;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairLJCutCoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
+{
+  eflag = eflag_in;
+  vflag = vflag_in;
+
+  if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
+
+  double evdwl = 0.0;
+  double ecoul = 0.0;
+  if (eflag || vflag) ev_setup(eflag,vflag);
+  else evflag = vflag_fdotr = 0;
+
+  atomKK->sync(execution_space,datamask_read);
+  k_cutsq.template sync<DeviceType>();
+  k_cut_ljsq.template sync<DeviceType>();
+  k_cut_coulsq.template sync<DeviceType>();
+  k_params.template sync<DeviceType>();
+  if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
+  else atomKK->modified(execution_space,F_MASK);
+
+  x = atomKK->k_x.view<DeviceType>();
+  c_x = atomKK->k_x.view<DeviceType>();
+  f = atomKK->k_f.view<DeviceType>();
+  q = atomKK->k_q.view<DeviceType>();
+  type = atomKK->k_type.view<DeviceType>();
+  nlocal = atom->nlocal;
+  nall = atom->nlocal + atom->nghost;
+  special_lj[0] = force->special_lj[0];
+  special_lj[1] = force->special_lj[1];
+  special_lj[2] = force->special_lj[2];
+  special_lj[3] = force->special_lj[3];
+  special_coul[0] = force->special_coul[0];
+  special_coul[1] = force->special_coul[1];
+  special_coul[2] = force->special_coul[2];
+  special_coul[3] = force->special_coul[3];
+  qqrd2e = force->qqrd2e;
+  newton_pair = force->newton_pair;
+
+  // loop over neighbors of my atoms
+
+  EV_FLOAT ev = pair_compute<PairLJCutCoulCutKokkos<DeviceType>,void >
+    (this,(NeighListKokkos<DeviceType>*)list);
+
+  DeviceType::fence();
+
+  if (eflag) {
+    eng_vdwl += ev.evdwl;
+    eng_coul += ev.ecoul;
+  }
+  if (vflag_global) {
+    virial[0] += ev.v[0];
+    virial[1] += ev.v[1];
+    virial[2] += ev.v[2];
+    virial[3] += ev.v[3];
+    virial[4] += ev.v[4];
+    virial[5] += ev.v[5];
+  }
+
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+/* ----------------------------------------------------------------------
+   compute LJ 12-6 pair force between atoms i and j
+   ---------------------------------------------------------------------- */
+template<class DeviceType>
+template<bool STACKPARAMS, class Specialisation>
+KOKKOS_INLINE_FUNCTION
+F_FLOAT PairLJCutCoulCutKokkos<DeviceType>::
+compute_fpair(const F_FLOAT& rsq, const int& i, const int&j,
+              const int& itype, const int& jtype) const {
+  const F_FLOAT r2inv = 1.0/rsq;
+  const F_FLOAT r6inv = r2inv*r2inv*r2inv;
+  F_FLOAT forcelj;
+
+  forcelj = r6inv *
+    ((STACKPARAMS?m_params[itype][jtype].lj1:params(itype,jtype).lj1)*r6inv -
+     (STACKPARAMS?m_params[itype][jtype].lj2:params(itype,jtype).lj2));
+
+  return forcelj*r2inv;
+}
+
+/* ----------------------------------------------------------------------
+   compute coulomb pair force between atoms i and j
+   ---------------------------------------------------------------------- */
+template<class DeviceType>
+template<bool STACKPARAMS, class Specialisation>
+KOKKOS_INLINE_FUNCTION
+F_FLOAT PairLJCutCoulCutKokkos<DeviceType>::
+compute_fcoul(const F_FLOAT& rsq, const int& i, const int&j,
+              const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const {
+  const F_FLOAT r2inv = 1.0/rsq;
+  const F_FLOAT rinv = sqrt(r2inv);
+  F_FLOAT forcecoul;
+
+  forcecoul = qqrd2e*qtmp*q(j) *rinv;
+
+  return factor_coul*forcecoul*r2inv;
+}
+
+/* ----------------------------------------------------------------------
+   compute LJ 12-6 pair potential energy between atoms i and j
+   ---------------------------------------------------------------------- */
+template<class DeviceType>
+template<bool STACKPARAMS, class Specialisation>
+KOKKOS_INLINE_FUNCTION
+F_FLOAT PairLJCutCoulCutKokkos<DeviceType>::
+compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j,
+              const int& itype, const int& jtype) const {
+  const F_FLOAT r2inv = 1.0/rsq;
+  const F_FLOAT r6inv = r2inv*r2inv*r2inv;
+
+  return r6inv*
+    ((STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv
+     - (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4))
+    -  (STACKPARAMS?m_params[itype][jtype].offset:params(itype,jtype).offset);
+
+}
+
+/* ----------------------------------------------------------------------
+   compute coulomb pair potential energy between atoms i and j
+   ---------------------------------------------------------------------- */
+template<class DeviceType>
+template<bool STACKPARAMS, class Specialisation>
+KOKKOS_INLINE_FUNCTION
+F_FLOAT PairLJCutCoulCutKokkos<DeviceType>::
+compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j,
+              const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const {
+  const F_FLOAT r2inv = 1.0/rsq;
+  const F_FLOAT rinv = sqrt(r2inv);
+
+  return factor_coul*qqrd2e*qtmp*q(j)*rinv;
+
+}
+
+/* ----------------------------------------------------------------------
+   allocate all arrays
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairLJCutCoulCutKokkos<DeviceType>::allocate()
+{
+  PairLJCutCoulCut::allocate();
+
+  int n = atom->ntypes;
+  memory->destroy(cutsq);
+  memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq");
+  d_cutsq = k_cutsq.template view<DeviceType>();
+  memory->destroy(cut_ljsq);
+  memory->create_kokkos(k_cut_ljsq,cut_ljsq,n+1,n+1,"pair:cut_ljsq");
+  d_cut_ljsq = k_cut_ljsq.template view<DeviceType>();
+  memory->destroy(cut_coulsq);
+  memory->create_kokkos(k_cut_coulsq,cut_coulsq,n+1,n+1,"pair:cut_coulsq");
+  d_cut_coulsq = k_cut_coulsq.template view<DeviceType>();
+  k_params = Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType>("PairLJCutCoulCut::params",n+1,n+1);
+  params = k_params.d_view;
+}
+
+/* ----------------------------------------------------------------------
+   global settings
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairLJCutCoulCutKokkos<DeviceType>::settings(int narg, char **arg)
+{
+  if (narg > 2) error->all(FLERR,"Illegal pair_style command");
+
+  PairLJCutCoulCut::settings(1,arg);
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairLJCutCoulCutKokkos<DeviceType>::init_style()
+{
+  PairLJCutCoulCut::init_style();
+
+  // error if rRESPA with inner levels
+
+  if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) {
+    int respa = 0;
+    if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
+    if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
+    if (respa)
+      error->all(FLERR,"Cannot use Kokkos pair style with rRESPA inner/middle");
+  }
+
+  // irequest = neigh request made by parent class
+
+  neighflag = lmp->kokkos->neighflag;
+  int irequest = neighbor->nrequest - 1;
+
+  neighbor->requests[irequest]->
+    kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value &&
+    !Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
+  neighbor->requests[irequest]->
+    kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
+
+  if (neighflag == FULL) {
+    neighbor->requests[irequest]->full = 1;
+    neighbor->requests[irequest]->half = 0;
+    neighbor->requests[irequest]->full_cluster = 0;
+  } else if (neighflag == HALF || neighflag == HALFTHREAD) {
+    neighbor->requests[irequest]->full = 0;
+    neighbor->requests[irequest]->half = 1;
+    neighbor->requests[irequest]->full_cluster = 0;
+  } else if (neighflag == N2) {
+    neighbor->requests[irequest]->full = 0;
+    neighbor->requests[irequest]->half = 0;
+    neighbor->requests[irequest]->full_cluster = 0;
+  } else if (neighflag == FULLCLUSTER) {
+    neighbor->requests[irequest]->full_cluster = 1;
+    neighbor->requests[irequest]->full = 1;
+    neighbor->requests[irequest]->half = 0;
+  } else {
+    error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/cut/kk");
+  }
+}
+
+/* ----------------------------------------------------------------------
+   init for one type pair i,j and corresponding j,i
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+double PairLJCutCoulCutKokkos<DeviceType>::init_one(int i, int j)
+{
+  double cutone = PairLJCutCoulCut::init_one(i,j);
+  double cut_ljsqm = cut_ljsq[i][j];
+  double cut_coulsqm = cut_coulsq[i][j];
+
+  k_params.h_view(i,j).lj1 = lj1[i][j];
+  k_params.h_view(i,j).lj2 = lj2[i][j];
+  k_params.h_view(i,j).lj3 = lj3[i][j];
+  k_params.h_view(i,j).lj4 = lj4[i][j];
+  k_params.h_view(i,j).offset = offset[i][j];
+  k_params.h_view(i,j).cut_ljsq = cut_ljsqm;
+  k_params.h_view(i,j).cut_coulsq = cut_coulsqm;
+
+  k_params.h_view(j,i) = k_params.h_view(i,j);
+  if(i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) {
+    m_params[i][j] = m_params[j][i] = k_params.h_view(i,j);
+    m_cutsq[j][i] = m_cutsq[i][j] = cutone*cutone;
+    m_cut_ljsq[j][i] = m_cut_ljsq[i][j] = cut_ljsqm;
+    m_cut_coulsq[j][i] = m_cut_coulsq[i][j] = cut_coulsqm;
+  }
+  k_cutsq.h_view(i,j) = cutone*cutone;
+  k_cutsq.template modify<LMPHostType>();
+  k_cut_ljsq.h_view(i,j) = cut_ljsqm;
+  k_cut_ljsq.template modify<LMPHostType>();
+  k_cut_coulsq.h_view(i,j) = cut_coulsqm;
+  k_cut_coulsq.template modify<LMPHostType>();
+  k_params.template modify<LMPHostType>();
+
+  return cutone;
+}
+
+
+
+template class PairLJCutCoulCutKokkos<LMPDeviceType>;
+#ifdef KOKKOS_HAVE_CUDA
+template class PairLJCutCoulCutKokkos<LMPHostType>;
+#endif
diff --git a/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.h b/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.h
new file mode 100644
index 0000000000000000000000000000000000000000..cc91a5989aca913bed237bbaa0c18fc6783a66fd
--- /dev/null
+++ b/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.h
@@ -0,0 +1,131 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(lj/cut/coul/cut/kk,PairLJCutCoulCutKokkos<LMPDeviceType>)
+PairStyle(lj/cut/coul/cut/kk/device,PairLJCutCoulCutKokkos<LMPDeviceType>)
+PairStyle(lj/cut/coul/cut/kk/host,PairLJCutCoulCutKokkos<LMPHostType>)
+
+#else
+
+#ifndef LMP_PAIR_LJ_CUT_COUL_CUT_KOKKOS_H
+#define LMP_PAIR_LJ_CUT_COUL_CUT_KOKKOS_H
+
+#include "pair_kokkos.h"
+#include "pair_lj_cut_coul_cut.h"
+#include "neigh_list_kokkos.h"
+
+namespace LAMMPS_NS {
+
+template<class DeviceType>
+class PairLJCutCoulCutKokkos : public PairLJCutCoulCut {
+ public:
+  enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF};
+  enum {COUL_FLAG=1};
+  typedef DeviceType device_type;
+  PairLJCutCoulCutKokkos(class LAMMPS *);
+  ~PairLJCutCoulCutKokkos();
+
+  void compute(int, int);
+
+  void settings(int, char **);
+  void init_style();
+  double init_one(int, int);
+
+  struct params_lj_coul{
+    params_lj_coul(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
+    params_lj_coul(int i){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
+    F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset;
+  };
+
+ protected:
+  void cleanup_copy();
+
+  template<bool STACKPARAMS, class Specialisation>
+  KOKKOS_INLINE_FUNCTION
+  F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j,
+                        const int& itype, const int& jtype) const;
+
+  template<bool STACKPARAMS, class Specialisation>
+  KOKKOS_INLINE_FUNCTION
+  F_FLOAT compute_fcoul(const F_FLOAT& rsq, const int& i, const int&j,
+                        const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const;
+
+  template<bool STACKPARAMS, class Specialisation>
+  KOKKOS_INLINE_FUNCTION
+  F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j,
+                        const int& itype, const int& jtype) const;
+
+  template<bool STACKPARAMS, class Specialisation>
+  KOKKOS_INLINE_FUNCTION
+  F_FLOAT compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j,
+                        const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const;
+
+  Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType> k_params;
+  typename Kokkos::DualView<params_lj_coul**,
+    Kokkos::LayoutRight,DeviceType>::t_dev_const params;
+  // hardwired to space for 15 atom types
+  params_lj_coul m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
+
+  F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
+  F_FLOAT m_cut_ljsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
+  F_FLOAT m_cut_coulsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
+  typename ArrayTypes<DeviceType>::t_x_array_randomread x;
+  typename ArrayTypes<DeviceType>::t_x_array c_x;
+  typename ArrayTypes<DeviceType>::t_f_array f;
+  typename ArrayTypes<DeviceType>::t_int_1d_randomread type;
+  typename ArrayTypes<DeviceType>::t_float_1d_randomread q;
+  typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
+  typename ArrayTypes<DeviceType>::t_virial_array d_vatom;
+
+  int newton_pair;
+
+  typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cutsq;
+  typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq;
+  typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cut_ljsq;
+  typename ArrayTypes<DeviceType>::t_ffloat_2d d_cut_ljsq;
+  typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cut_coulsq;
+  typename ArrayTypes<DeviceType>::t_ffloat_2d d_cut_coulsq;
+
+  class AtomKokkos *atomKK;
+  int neighflag;
+  int nlocal,nall,eflag,vflag;
+
+  double special_coul[4];
+  double special_lj[4];
+  double qqrd2e;
+
+  void allocate();
+  friend class PairComputeFunctor<PairLJCutCoulCutKokkos,FULL,true>;
+  friend class PairComputeFunctor<PairLJCutCoulCutKokkos,HALF,true>;
+  friend class PairComputeFunctor<PairLJCutCoulCutKokkos,HALFTHREAD,true>;
+  friend class PairComputeFunctor<PairLJCutCoulCutKokkos,FULL,false>;
+  friend class PairComputeFunctor<PairLJCutCoulCutKokkos,HALF,false>;
+  friend class PairComputeFunctor<PairLJCutCoulCutKokkos,HALFTHREAD,false>;
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulCutKokkos,FULL,void>(PairLJCutCoulCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulCutKokkos,HALF,void>(PairLJCutCoulCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulCutKokkos,HALFTHREAD,void>(PairLJCutCoulCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairLJCutCoulCutKokkos,void>(PairLJCutCoulCutKokkos*,
+                                                            NeighListKokkos<DeviceType>*);
+
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+*/
diff --git a/src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp b/src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5c4b834bee41d13dccf85ba01427cab6f563c486
--- /dev/null
+++ b/src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp
@@ -0,0 +1,496 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "string.h"
+#include "pair_lj_cut_coul_long_kokkos.h"
+#include "kokkos.h"
+#include "atom_kokkos.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "neigh_request.h"
+#include "update.h"
+#include "integrate.h"
+#include "respa.h"
+#include "math_const.h"
+#include "memory.h"
+#include "error.h"
+#include "atom_masks.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define KOKKOS_CUDA_MAX_THREADS 256
+#define KOKKOS_CUDA_MIN_BLOCKS 8
+
+
+#define EWALD_F   1.12837917
+#define EWALD_P   0.3275911
+#define A1        0.254829592
+#define A2       -0.284496736
+#define A3        1.421413741
+#define A4       -1.453152027
+#define A5        1.061405429
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairLJCutCoulLongKokkos<DeviceType>::PairLJCutCoulLongKokkos(LAMMPS *lmp):PairLJCutCoulLong(lmp)
+{
+  respa_enable = 0;
+
+  atomKK = (AtomKokkos *) atom;
+  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
+  datamask_read = X_MASK | F_MASK | TYPE_MASK | Q_MASK | ENERGY_MASK | VIRIAL_MASK;
+  datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
+  cutsq = NULL;
+  cut_ljsq = NULL;
+  cut_coulsq = NULL;
+
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairLJCutCoulLongKokkos<DeviceType>::~PairLJCutCoulLongKokkos()
+{
+  if (allocated){
+    memory->destroy_kokkos(k_cutsq, cutsq);
+    memory->destroy_kokkos(k_cut_ljsq, cut_ljsq);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairLJCutCoulLongKokkos<DeviceType>::cleanup_copy() {
+  // WHY needed: this prevents parent copy from deallocating any arrays
+  allocated = 0;
+  cutsq = NULL;
+  cut_ljsq = NULL;
+  eatom = NULL;
+  vatom = NULL;
+  ftable = NULL;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairLJCutCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
+{
+  eflag = eflag_in;
+  vflag = vflag_in;
+
+  if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
+
+  double evdwl = 0.0;
+  double ecoul = 0.0;
+  if (eflag || vflag) ev_setup(eflag,vflag);
+  else evflag = vflag_fdotr = 0;
+
+  atomKK->sync(execution_space,datamask_read);
+  k_cutsq.template sync<DeviceType>();
+  k_cut_ljsq.template sync<DeviceType>();
+  k_cut_coulsq.template sync<DeviceType>();
+  k_params.template sync<DeviceType>();
+  if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
+  else atomKK->modified(execution_space,F_MASK);
+
+  x = atomKK->k_x.view<DeviceType>();
+  c_x = atomKK->k_x.view<DeviceType>();
+  f = atomKK->k_f.view<DeviceType>();
+  q = atomKK->k_q.view<DeviceType>();
+  type = atomKK->k_type.view<DeviceType>();
+  nlocal = atom->nlocal;
+  nall = atom->nlocal + atom->nghost;
+  special_lj[0] = force->special_lj[0];
+  special_lj[1] = force->special_lj[1];
+  special_lj[2] = force->special_lj[2];
+  special_lj[3] = force->special_lj[3];
+  special_coul[0] = force->special_coul[0];
+  special_coul[1] = force->special_coul[1];
+  special_coul[2] = force->special_coul[2];
+  special_coul[3] = force->special_coul[3];
+  qqrd2e = force->qqrd2e;
+  newton_pair = force->newton_pair;
+
+  // loop over neighbors of my atoms
+
+  EV_FLOAT ev;
+  if(ncoultablebits)
+    ev = pair_compute<PairLJCutCoulLongKokkos<DeviceType>,CoulLongTable<1> >
+      (this,(NeighListKokkos<DeviceType>*)list);
+  else
+    ev = pair_compute<PairLJCutCoulLongKokkos<DeviceType>,CoulLongTable<0> >
+      (this,(NeighListKokkos<DeviceType>*)list);
+
+
+  DeviceType::fence();
+
+  if (eflag) {
+    eng_vdwl += ev.evdwl;
+    eng_coul += ev.ecoul;
+  }
+  if (vflag_global) {
+    virial[0] += ev.v[0];
+    virial[1] += ev.v[1];
+    virial[2] += ev.v[2];
+    virial[3] += ev.v[3];
+    virial[4] += ev.v[4];
+    virial[5] += ev.v[5];
+  }
+
+  if (vflag_fdotr) pair_virial_fdotr_compute(this);
+}
+
+/* ----------------------------------------------------------------------
+   compute LJ 12-6 pair force between atoms i and j
+   ---------------------------------------------------------------------- */
+template<class DeviceType>
+template<bool STACKPARAMS, class Specialisation>
+KOKKOS_INLINE_FUNCTION
+F_FLOAT PairLJCutCoulLongKokkos<DeviceType>::
+compute_fpair(const F_FLOAT& rsq, const int& i, const int&j,
+              const int& itype, const int& jtype) const {
+  const F_FLOAT r2inv = 1.0/rsq;
+  const F_FLOAT r6inv = r2inv*r2inv*r2inv;
+  F_FLOAT forcelj;
+
+  forcelj = r6inv *
+    ((STACKPARAMS?m_params[itype][jtype].lj1:params(itype,jtype).lj1)*r6inv -
+     (STACKPARAMS?m_params[itype][jtype].lj2:params(itype,jtype).lj2));
+
+  return forcelj*r2inv;
+}
+
+/* ----------------------------------------------------------------------
+   compute coulomb pair force between atoms i and j
+   ---------------------------------------------------------------------- */
+template<class DeviceType>
+template<bool STACKPARAMS,  class Specialisation>
+KOKKOS_INLINE_FUNCTION
+F_FLOAT PairLJCutCoulLongKokkos<DeviceType>::
+compute_fcoul(const F_FLOAT& rsq, const int& i, const int&j,
+              const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const {
+  if(Specialisation::DoTable && rsq > tabinnersq) {
+    union_int_float_t rsq_lookup;
+    rsq_lookup.f = rsq;
+    const int itable = (rsq_lookup.i & ncoulmask) >> ncoulshiftbits;
+    const F_FLOAT fraction = (rsq_lookup.f - d_rtable[itable]) * d_drtable[itable];
+    const F_FLOAT table = d_ftable[itable] + fraction*d_dftable[itable];
+    F_FLOAT forcecoul = qtmp*q[j] * table;
+    if (factor_coul < 1.0) {
+      const F_FLOAT table = d_ctable[itable] + fraction*d_dctable[itable];
+      const F_FLOAT prefactor = qtmp*q[j] * table;
+      forcecoul -= (1.0-factor_coul)*prefactor;
+    }
+    return forcecoul/rsq;
+  } else {
+    const F_FLOAT r = sqrt(rsq);
+    const F_FLOAT grij = g_ewald * r;
+    const F_FLOAT expm2 = exp(-grij*grij);
+    const F_FLOAT t = 1.0 / (1.0 + EWALD_P*grij);
+    const F_FLOAT rinv = 1.0/r;
+    const F_FLOAT erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+    const F_FLOAT prefactor = qqrd2e * qtmp*q[j]*rinv;
+    F_FLOAT forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
+    if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
+
+    return forcecoul*rinv*rinv;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   compute LJ 12-6 pair potential energy between atoms i and j
+   ---------------------------------------------------------------------- */
+template<class DeviceType>
+template<bool STACKPARAMS, class Specialisation>
+KOKKOS_INLINE_FUNCTION
+F_FLOAT PairLJCutCoulLongKokkos<DeviceType>::
+compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j,
+              const int& itype, const int& jtype) const {
+  const F_FLOAT r2inv = 1.0/rsq;
+  const F_FLOAT r6inv = r2inv*r2inv*r2inv;
+
+  return r6inv*
+    ((STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv
+     - (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4))
+    -  (STACKPARAMS?m_params[itype][jtype].offset:params(itype,jtype).offset);
+
+}
+
+/* ----------------------------------------------------------------------
+   compute coulomb pair potential energy between atoms i and j
+   ---------------------------------------------------------------------- */
+template<class DeviceType>
+template<bool STACKPARAMS, class Specialisation>
+KOKKOS_INLINE_FUNCTION
+F_FLOAT PairLJCutCoulLongKokkos<DeviceType>::
+compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j,
+              const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const {
+  if(Specialisation::DoTable) {
+    union_int_float_t rsq_lookup;
+    rsq_lookup.f = rsq;
+    const int itable = (rsq_lookup.i & ncoulmask) >> ncoulshiftbits;
+    const F_FLOAT fraction = (rsq_lookup.f - d_rtable[itable]) * d_drtable[itable];
+    const F_FLOAT table = d_etable[itable] + fraction*d_detable[itable];
+    F_FLOAT ecoul = qtmp*q[j] * table;
+    if (factor_coul < 1.0) {
+      const F_FLOAT table = d_ctable[itable] + fraction*d_dctable[itable];
+      const F_FLOAT prefactor = qtmp*q[j] * table;
+      ecoul -= (1.0-factor_coul)*prefactor;
+    }
+    return ecoul;
+  } else {
+    const F_FLOAT r = sqrt(rsq);
+    const F_FLOAT grij = g_ewald * r;
+    const F_FLOAT expm2 = exp(-grij*grij);
+    const F_FLOAT t = 1.0 / (1.0 + EWALD_P*grij);
+    const F_FLOAT erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+    const F_FLOAT prefactor = qqrd2e * qtmp*q[j]/r;
+    F_FLOAT ecoul = prefactor * erfc;
+    if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
+    return ecoul;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   allocate all arrays
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairLJCutCoulLongKokkos<DeviceType>::allocate()
+{
+  PairLJCutCoulLong::allocate();
+
+  int n = atom->ntypes;
+  memory->destroy(cutsq);
+  memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq");
+  d_cutsq = k_cutsq.template view<DeviceType>();
+  memory->destroy(cut_ljsq);
+  memory->create_kokkos(k_cut_ljsq,cut_ljsq,n+1,n+1,"pair:cut_ljsq");
+  d_cut_ljsq = k_cut_ljsq.template view<DeviceType>();
+
+  memory->create_kokkos(k_cut_coulsq,n+1,n+1,"pair:cut_coulsq");
+  d_cut_coulsq = k_cut_coulsq.template view<DeviceType>();
+  k_params = Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType>("PairLJCutCoulLong::params",n+1,n+1);
+  params = k_params.d_view;
+}
+
+template<class DeviceType>
+void PairLJCutCoulLongKokkos<DeviceType>::init_tables(double cut_coul, double *cut_respa)
+{
+  Pair::init_tables(cut_coul,cut_respa);
+
+  typedef typename ArrayTypes<DeviceType>::t_ffloat_1d table_type;
+  typedef typename ArrayTypes<LMPHostType>::t_ffloat_1d host_table_type;
+
+  int ntable = 1;
+  for (int i = 0; i < ncoultablebits; i++) ntable *= 2;
+
+
+  // Copy rtable and drtable
+  {
+  host_table_type h_table("HostTable",ntable);
+  table_type d_table("DeviceTable",ntable);
+  for(int i = 0; i < ntable; i++) {
+    h_table(i) = rtable[i];
+  }
+  Kokkos::deep_copy(d_table,h_table);
+  d_rtable = d_table;
+  }
+
+  {
+  host_table_type h_table("HostTable",ntable);
+  table_type d_table("DeviceTable",ntable);
+  for(int i = 0; i < ntable; i++) {
+    h_table(i) = drtable[i];
+  }
+  Kokkos::deep_copy(d_table,h_table);
+  d_drtable = d_table;
+  }
+
+  {
+  host_table_type h_table("HostTable",ntable);
+  table_type d_table("DeviceTable",ntable);
+
+  // Copy ftable and dftable
+  for(int i = 0; i < ntable; i++) {
+    h_table(i) = ftable[i];
+  }
+  Kokkos::deep_copy(d_table,h_table);
+  d_ftable = d_table;
+  }
+
+  {
+  host_table_type h_table("HostTable",ntable);
+  table_type d_table("DeviceTable",ntable);
+
+  for(int i = 0; i < ntable; i++) {
+    h_table(i) = dftable[i];
+  }
+  Kokkos::deep_copy(d_table,h_table);
+  d_dftable = d_table;
+  }
+
+  {
+  host_table_type h_table("HostTable",ntable);
+  table_type d_table("DeviceTable",ntable);
+
+  // Copy ctable and dctable
+  for(int i = 0; i < ntable; i++) {
+    h_table(i) = ctable[i];
+  }
+  Kokkos::deep_copy(d_table,h_table);
+  d_ctable = d_table;
+  }
+
+  {
+  host_table_type h_table("HostTable",ntable);
+  table_type d_table("DeviceTable",ntable);
+
+  for(int i = 0; i < ntable; i++) {
+    h_table(i) = dctable[i];
+  }
+  Kokkos::deep_copy(d_table,h_table);
+  d_dctable = d_table;
+  }
+
+  {
+  host_table_type h_table("HostTable",ntable);
+  table_type d_table("DeviceTable",ntable);
+
+  // Copy etable and detable
+  for(int i = 0; i < ntable; i++) {
+    h_table(i) = etable[i];
+  }
+  Kokkos::deep_copy(d_table,h_table);
+  d_etable = d_table;
+  }
+
+  {
+  host_table_type h_table("HostTable",ntable);
+  table_type d_table("DeviceTable",ntable);
+
+  for(int i = 0; i < ntable; i++) {
+    h_table(i) = detable[i];
+  }
+  Kokkos::deep_copy(d_table,h_table);
+  d_detable = d_table;
+  }
+}
+
+
+/* ----------------------------------------------------------------------
+   global settings
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairLJCutCoulLongKokkos<DeviceType>::settings(int narg, char **arg)
+{
+  if (narg > 2) error->all(FLERR,"Illegal pair_style command");
+
+  PairLJCutCoulLong::settings(narg,arg);
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairLJCutCoulLongKokkos<DeviceType>::init_style()
+{
+  PairLJCutCoulLong::init_style();
+
+  // error if rRESPA with inner levels
+
+  if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) {
+    int respa = 0;
+    if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
+    if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
+    if (respa)
+      error->all(FLERR,"Cannot use Kokkos pair style with rRESPA inner/middle");
+  }
+
+  // irequest = neigh request made by parent class
+
+  neighflag = lmp->kokkos->neighflag;
+  int irequest = neighbor->nrequest - 1;
+
+  neighbor->requests[irequest]->
+    kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value &&
+    !Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
+  neighbor->requests[irequest]->
+    kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
+
+  if (neighflag == FULL) {
+    neighbor->requests[irequest]->full = 1;
+    neighbor->requests[irequest]->half = 0;
+    neighbor->requests[irequest]->full_cluster = 0;
+  } else if (neighflag == HALF || neighflag == HALFTHREAD) {
+    neighbor->requests[irequest]->full = 0;
+    neighbor->requests[irequest]->half = 1;
+    neighbor->requests[irequest]->full_cluster = 0;
+  } else {
+    error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/long/kk");
+  }
+}
+
+/* ----------------------------------------------------------------------
+   init for one type pair i,j and corresponding j,i
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+double PairLJCutCoulLongKokkos<DeviceType>::init_one(int i, int j)
+{
+  double cutone = PairLJCutCoulLong::init_one(i,j);
+  double cut_ljsqm = cut_ljsq[i][j];
+  double cut_coulsqm = cut_coulsq;
+
+  k_params.h_view(i,j).lj1 = lj1[i][j];
+  k_params.h_view(i,j).lj2 = lj2[i][j];
+  k_params.h_view(i,j).lj3 = lj3[i][j];
+  k_params.h_view(i,j).lj4 = lj4[i][j];
+  k_params.h_view(i,j).offset = offset[i][j];
+  k_params.h_view(i,j).cut_ljsq = cut_ljsqm;
+  k_params.h_view(i,j).cut_coulsq = cut_coulsqm;
+
+  k_params.h_view(j,i) = k_params.h_view(i,j);
+  if(i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) {
+    m_params[i][j] = m_params[j][i] = k_params.h_view(i,j);
+    m_cutsq[j][i] = m_cutsq[i][j] = cutone*cutone;
+    m_cut_ljsq[j][i] = m_cut_ljsq[i][j] = cut_ljsqm;
+    m_cut_coulsq[j][i] = m_cut_coulsq[i][j] = cut_coulsqm;
+  }
+
+  k_cutsq.h_view(i,j) = cutone*cutone;
+  k_cutsq.template modify<LMPHostType>();
+  k_cut_ljsq.h_view(i,j) = cut_ljsqm;
+  k_cut_ljsq.template modify<LMPHostType>();
+  k_cut_coulsq.h_view(i,j) = cut_coulsqm;
+  k_cut_coulsq.template modify<LMPHostType>();
+  k_params.template modify<LMPHostType>();
+
+  return cutone;
+}
+
+
+
+template class PairLJCutCoulLongKokkos<LMPDeviceType>;
+#ifdef KOKKOS_HAVE_CUDA
+template class PairLJCutCoulLongKokkos<LMPHostType>;
+#endif
diff --git a/src/KOKKOS/pair_lj_cut_coul_long_kokkos.h b/src/KOKKOS/pair_lj_cut_coul_long_kokkos.h
new file mode 100644
index 0000000000000000000000000000000000000000..d6512174f14b6319b98f31d107a5326a90e5c885
--- /dev/null
+++ b/src/KOKKOS/pair_lj_cut_coul_long_kokkos.h
@@ -0,0 +1,147 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(lj/cut/coul/long/kk,PairLJCutCoulLongKokkos<LMPDeviceType>)
+PairStyle(lj/cut/coul/long/kk/device,PairLJCutCoulLongKokkos<LMPDeviceType>)
+PairStyle(lj/cut/coul/long/kk/host,PairLJCutCoulLongKokkos<LMPHostType>)
+
+#else
+
+#ifndef LMP_PAIR_LJ_CUT_COUL_LONG_KOKKOS_H
+#define LMP_PAIR_LJ_CUT_COUL_LONG_KOKKOS_H
+
+#include "pair_kokkos.h"
+#include "pair_lj_cut_coul_long.h"
+#include "neigh_list_kokkos.h"
+
+namespace LAMMPS_NS {
+
+template<class DeviceType>
+class PairLJCutCoulLongKokkos : public PairLJCutCoulLong {
+ public:
+  enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF};
+  enum {COUL_FLAG=1};
+  typedef DeviceType device_type;
+  PairLJCutCoulLongKokkos(class LAMMPS *);
+  ~PairLJCutCoulLongKokkos();
+
+  void compute(int, int);
+
+  void settings(int, char **);
+  void init_tables(double cut_coul, double *cut_respa);
+  void init_style();
+  double init_one(int, int);
+
+  struct params_lj_coul{
+    params_lj_coul(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
+    params_lj_coul(int i){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
+    F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset;
+  };
+
+ protected:
+  void cleanup_copy();
+
+  template<bool STACKPARAMS, class Specialisation>
+  KOKKOS_INLINE_FUNCTION
+  F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j,
+                        const int& itype, const int& jtype) const;
+
+  template<bool STACKPARAMS, class Specialisation>
+  KOKKOS_INLINE_FUNCTION
+  F_FLOAT compute_fcoul(const F_FLOAT& rsq, const int& i, const int&j, const int& itype,
+                        const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const;
+
+  template<bool STACKPARAMS, class Specialisation>
+  KOKKOS_INLINE_FUNCTION
+  F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j,
+                        const int& itype, const int& jtype) const;
+
+  template<bool STACKPARAMS, class Specialisation>
+  KOKKOS_INLINE_FUNCTION
+  F_FLOAT compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j,
+                        const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const;
+
+  Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType> k_params;
+  typename Kokkos::DualView<params_lj_coul**,
+    Kokkos::LayoutRight,DeviceType>::t_dev_const params;
+  // hardwired to space for 15 atom types
+  params_lj_coul m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
+
+  F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
+  F_FLOAT m_cut_ljsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
+  F_FLOAT m_cut_coulsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
+  typename ArrayTypes<DeviceType>::t_x_array_randomread x;
+  typename ArrayTypes<DeviceType>::t_x_array c_x;
+  typename ArrayTypes<DeviceType>::t_f_array f;
+  typename ArrayTypes<DeviceType>::t_int_1d_randomread type;
+  typename ArrayTypes<DeviceType>::t_float_1d_randomread q;
+  typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
+  typename ArrayTypes<DeviceType>::t_virial_array d_vatom;
+
+  int newton_pair;
+
+  typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cutsq;
+  typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq;
+  typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cut_ljsq;
+  typename ArrayTypes<DeviceType>::t_ffloat_2d d_cut_ljsq;
+  typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cut_coulsq;
+  typename ArrayTypes<DeviceType>::t_ffloat_2d d_cut_coulsq;
+
+  typename ArrayTypes<DeviceType>::t_ffloat_1d_randomread
+    d_rtable, d_drtable, d_ftable, d_dftable,
+    d_ctable, d_dctable, d_etable, d_detable;
+  class AtomKokkos *atomKK;
+  int neighflag;
+  int nlocal,nall,eflag,vflag;
+
+  double special_coul[4];
+  double special_lj[4];
+  double qqrd2e;
+
+  void allocate();
+  friend class PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,true,CoulLongTable<1> >;
+  friend class PairComputeFunctor<PairLJCutCoulLongKokkos,HALF,true,CoulLongTable<1> >;
+  friend class PairComputeFunctor<PairLJCutCoulLongKokkos,HALFTHREAD,true,CoulLongTable<1> >;
+  friend class PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,false,CoulLongTable<1> >;
+  friend class PairComputeFunctor<PairLJCutCoulLongKokkos,HALF,false,CoulLongTable<1> >;
+  friend class PairComputeFunctor<PairLJCutCoulLongKokkos,HALFTHREAD,false,CoulLongTable<1> >;
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,FULL,CoulLongTable<1> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,HALF,CoulLongTable<1> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,HALFTHREAD,CoulLongTable<1> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairLJCutCoulLongKokkos,CoulLongTable<1> >(PairLJCutCoulLongKokkos*,
+                                                            NeighListKokkos<DeviceType>*);
+  friend class PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,true,CoulLongTable<0> >;
+  friend class PairComputeFunctor<PairLJCutCoulLongKokkos,HALF,true,CoulLongTable<0> >;
+  friend class PairComputeFunctor<PairLJCutCoulLongKokkos,HALFTHREAD,true,CoulLongTable<0> >;
+  friend class PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,false,CoulLongTable<0> >;
+  friend class PairComputeFunctor<PairLJCutCoulLongKokkos,HALF,false,CoulLongTable<0> >;
+  friend class PairComputeFunctor<PairLJCutCoulLongKokkos,HALFTHREAD,false,CoulLongTable<0> >;
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,FULL,CoulLongTable<0> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,HALF,CoulLongTable<0> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,HALFTHREAD,CoulLongTable<0> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairLJCutCoulLongKokkos,CoulLongTable<0> >(PairLJCutCoulLongKokkos*,
+                                                            NeighListKokkos<DeviceType>*);
+  friend void pair_virial_fdotr_compute<PairLJCutCoulLongKokkos>(PairLJCutCoulLongKokkos*);
+
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+*/
diff --git a/src/KOKKOS/pair_lj_cut_kokkos.cpp b/src/KOKKOS/pair_lj_cut_kokkos.cpp
index 94576a36c777b3dfafb17d4d6b497204f162f896..c69fbe89f51b75b26d379fd8cdd663f03a1ba4a8 100644
--- a/src/KOKKOS/pair_lj_cut_kokkos.cpp
+++ b/src/KOKKOS/pair_lj_cut_kokkos.cpp
@@ -99,18 +99,18 @@ void PairLJCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
   c_x = atomKK->k_x.view<DeviceType>();
   f = atomKK->k_f.view<DeviceType>();
   type = atomKK->k_type.view<DeviceType>();
+  tag = atomKK->k_tag.view<DeviceType>();
   nlocal = atom->nlocal;
   nall = atom->nlocal + atom->nghost;
+  newton_pair = force->newton_pair;
   special_lj[0] = force->special_lj[0];
   special_lj[1] = force->special_lj[1];
   special_lj[2] = force->special_lj[2];
   special_lj[3] = force->special_lj[3];
-  newton_pair = force->newton_pair;
 
   // loop over neighbors of my atoms
 
   EV_FLOAT ev = pair_compute<PairLJCutKokkos<DeviceType>,void >(this,(NeighListKokkos<DeviceType>*)list);
-
   DeviceType::fence();
 
   if (eflag) eng_vdwl += ev.evdwl;
@@ -123,7 +123,7 @@ void PairLJCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
     virial[5] += ev.v[5];
   }
 
-  if (vflag_fdotr) virial_fdotr_compute();
+  if (vflag_fdotr) pair_virial_fdotr_compute(this);
 }
 
 template<class DeviceType>
@@ -131,12 +131,15 @@ template<bool STACKPARAMS, class Specialisation>
 KOKKOS_INLINE_FUNCTION
 F_FLOAT PairLJCutKokkos<DeviceType>::
 compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const {
+  (void) i;
+  (void) j;
   const F_FLOAT r2inv = 1.0/rsq;
   const F_FLOAT r6inv = r2inv*r2inv*r2inv;
 
   const F_FLOAT forcelj = r6inv *
     ((STACKPARAMS?m_params[itype][jtype].lj1:params(itype,jtype).lj1)*r6inv -
      (STACKPARAMS?m_params[itype][jtype].lj2:params(itype,jtype).lj2));
+
   return forcelj*r2inv;
 }
 
@@ -145,8 +148,11 @@ template<bool STACKPARAMS, class Specialisation>
 KOKKOS_INLINE_FUNCTION
 F_FLOAT PairLJCutKokkos<DeviceType>::
 compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const {
+  (void) i;
+  (void) j;
   const F_FLOAT r2inv = 1.0/rsq;
   const F_FLOAT r6inv = r2inv*r2inv*r2inv;
+
   return r6inv*((STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv -
                 (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)) -
                 (STACKPARAMS?m_params[itype][jtype].offset:params(itype,jtype).offset);
@@ -262,6 +268,6 @@ double PairLJCutKokkos<DeviceType>::init_one(int i, int j)
 
 
 template class PairLJCutKokkos<LMPDeviceType>;
-#if DEVICE==2
+#ifdef KOKKOS_HAVE_CUDA
 template class PairLJCutKokkos<LMPHostType>;
 #endif
diff --git a/src/KOKKOS/pair_lj_cut_kokkos.h b/src/KOKKOS/pair_lj_cut_kokkos.h
index 5c3c002af53cf84afeba4659bb41583f7d20f29b..0ba40b4d543fdb567937347ec67c863c69dd4800 100644
--- a/src/KOKKOS/pair_lj_cut_kokkos.h
+++ b/src/KOKKOS/pair_lj_cut_kokkos.h
@@ -31,6 +31,7 @@ namespace LAMMPS_NS {
 template<class DeviceType>
 class PairLJCutKokkos : public PairLJCut {
  public:
+  enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
   enum {COUL_FLAG=0};
   typedef DeviceType device_type;
   PairLJCutKokkos(class LAMMPS *);
@@ -76,9 +77,10 @@ class PairLJCutKokkos : public PairLJCut {
   typename ArrayTypes<DeviceType>::t_int_1d_randomread type;
   typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
   typename ArrayTypes<DeviceType>::t_virial_array d_vatom;
-  //typename ArrayTypes<DeviceType>::t_ffloat_1d special_lj;
+  typename ArrayTypes<DeviceType>::t_tagint_1d tag;
 
   int newton_pair;
+  double special_lj[4];
 
   typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cutsq;
   typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq;
@@ -98,8 +100,13 @@ class PairLJCutKokkos : public PairLJCut {
   friend class PairComputeFunctor<PairLJCutKokkos,HALFTHREAD,false>;
   friend class PairComputeFunctor<PairLJCutKokkos,N2,false>;
   friend class PairComputeFunctor<PairLJCutKokkos,FULLCLUSTER,false >;
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,FULL,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,HALF,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,HALFTHREAD,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,N2,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_fullcluster<PairLJCutKokkos,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
   friend EV_FLOAT pair_compute<PairLJCutKokkos,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
-
+  friend void pair_virial_fdotr_compute<PairLJCutKokkos>(PairLJCutKokkos*);
 };
 
 }
diff --git a/src/KOKKOS/pair_table_kokkos.cpp b/src/KOKKOS/pair_table_kokkos.cpp
index cc8072991aab055b67885e54c8f20af736c68b67..b4cc32adf5af2990758805c9433c191ae1ae4df7 100644
--- a/src/KOKKOS/pair_table_kokkos.cpp
+++ b/src/KOKKOS/pair_table_kokkos.cpp
@@ -153,8 +153,9 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
         const int teamsize = 1;
       #endif
       const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize;
-      if (eflag || vflag) Kokkos::parallel_reduce(Kokkos::ParallelWorkRequest(nteams,teamsize),f,ev);
-      else Kokkos::parallel_for(Kokkos::ParallelWorkRequest(nteams,teamsize),f);
+      Kokkos::TeamPolicy<DeviceType> config(nteams,teamsize);
+      if (eflag || vflag) Kokkos::parallel_reduce(config,f,ev);
+      else Kokkos::parallel_for(config,f);
     }
   } else {
     if (neighflag == FULL) {
@@ -187,8 +188,9 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
         const int teamsize = 1;
       #endif
       const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize;
-      if (eflag || vflag) Kokkos::parallel_reduce(Kokkos::ParallelWorkRequest(nteams,teamsize),f,ev);
-      else Kokkos::parallel_for(Kokkos::ParallelWorkRequest(nteams,teamsize),f);
+      Kokkos::TeamPolicy<DeviceType> config(nteams,teamsize);
+      if (eflag || vflag) Kokkos::parallel_reduce(config,f,ev);
+      else Kokkos::parallel_for(config,f);
     }
   }
   DeviceType::fence();
@@ -203,7 +205,7 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
     virial[5] += ev.v[5];
   }
 
-  if (vflag_fdotr) virial_fdotr_compute();
+  if (vflag_fdotr) pair_virial_fdotr_compute(this);
 }
 
 template<class DeviceType>
@@ -211,6 +213,8 @@ template<bool STACKPARAMS, class Specialisation>
 KOKKOS_INLINE_FUNCTION
 F_FLOAT PairTableKokkos<DeviceType>::
 compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const {
+  (void) i;
+  (void) j;
   union_int_float_t rsq_lookup;
   double fpair;
   const int tidx = d_table_const.tabindex(itype,jtype);
@@ -254,6 +258,8 @@ template<bool STACKPARAMS, class Specialisation>
 KOKKOS_INLINE_FUNCTION
 F_FLOAT PairTableKokkos<DeviceType>::
 compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const {
+  (void) i;
+  (void) j;
   double evdwl;
   union_int_float_t rsq_lookup;
   const int tidx = d_table_const.tabindex(itype,jtype);
@@ -292,128 +298,6 @@ compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, c
   return evdwl;
 }
 
-/*
-template<class DeviceType>
-template<int EVFLAG, int NEIGHFLAG, int NEWTON_PAIR,int TABSTYLE>
-KOKKOS_FUNCTION
-EV_FLOAT PairTableKokkos<DeviceType>::
-compute_item(const int &ii, const NeighListKokkos<DeviceType> &list) const
-{
-  EV_FLOAT ev;
-  const int tlm1 = tablength - 1;
-  union_int_float_t rsq_lookup;
-  const int i = list.d_ilist[ii];
-  const X_FLOAT xtmp = x(i,0);
-  const X_FLOAT ytmp = x(i,1);
-  const X_FLOAT ztmp = x(i,2);
-  const int itype = type(i);
-
-  const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
-  const int jnum = list.d_numneigh[i];
-
-  F_FLOAT fxtmp = 0.0;
-  F_FLOAT fytmp = 0.0;
-  F_FLOAT fztmp = 0.0;
-
-  for (int jj = 0; jj < jnum; jj++) {
-    int j = neighbors_i(jj);
-    const F_FLOAT factor_lj = 1.0;  //special_lj[sbmask(j)];
-    j &= NEIGHMASK;
-    const X_FLOAT delx = xtmp - x(j,0);
-    const X_FLOAT dely = ytmp - x(j,1);
-    const X_FLOAT delz = ztmp - x(j,2);
-    const int jtype = type(j);
-    const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
-
-      if (rsq < d_table_const.cutsq(itype,jtype)) {
-        double fpair; 
-        const int tidx = d_table_const.tabindex(itype,jtype);
-        //const Table* const tb = &tables[tabindex[itype][jtype]];
-        
-        //if (rsq < d_table_const.innersq(tidx))
-        //  error->one(FLERR,"Pair distance < table inner cutoff");
-
-        if (TABSTYLE == LOOKUP) {
-          const int itable = static_cast<int> ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
-          //if (itable >= tlm1)
-          //  error->one(FLERR,"Pair distance > table outer cutoff");
-          fpair = factor_lj * d_table_const.f(tidx,itable);
-          if (EVFLAG)
-            ev.evdwl = d_table_const.e(tidx,itable);
-        } else if (TABSTYLE == LINEAR) {
-          const int itable = static_cast<int> ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
-          //if (itable >= tlm1)
-          //  error->one(FLERR,"Pair distance > table outer cutoff");
-          const double fraction = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx);
-          const double value = d_table_const.f(tidx,itable) + fraction*d_table_const.df(tidx,itable);          
-          fpair = factor_lj * value;
-          if (EVFLAG)
-            ev.evdwl = d_table_const.e(tidx,itable) + fraction*d_table_const.de(tidx,itable);
-        } else if (TABSTYLE == SPLINE) {
-          const int itable = static_cast<int> ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
-          //if (itable >= tlm1)
-          //  error->one(FLERR,"Pair distance > table outer cutoff");
-          const double b = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx);
-          const double a = 1.0 - b;
-          const double value = a * d_table_const.f(tidx,itable) + b * d_table_const.f(tidx,itable+1) +
-            ((a*a*a-a)*d_table_const.f2(tidx,itable) + (b*b*b-b)*d_table_const.f2(tidx,itable+1)) *
-            d_table_const.deltasq6(tidx);
-          fpair = factor_lj * value;
-          if (EVFLAG)
-            ev.evdwl = a * d_table_const.e(tidx,itable) + b * d_table_const.e(tidx,itable+1) +
-              ((a*a*a-a)*d_table_const.e2(tidx,itable) + (b*b*b-b)*d_table_const.e2(tidx,itable+1)) *
-              d_table_const.deltasq6(tidx);
-        } else {
-          rsq_lookup.f = rsq;
-          int itable = rsq_lookup.i & d_table_const.nmask(tidx);
-          itable >>= d_table_const.nshiftbits(tidx);
-          const double fraction = (rsq_lookup.f - d_table_const.rsq(tidx,itable)) * d_table_const.drsq(tidx,itable);
-          const double value = d_table_const.f(tidx,itable) + fraction*d_table_const.df(tidx,itable);
-          fpair = factor_lj * value;
-          if (EVFLAG)
-            ev.evdwl = d_table_const.e(tidx,itable) + fraction*d_table_const.de(tidx,itable);
-        }
-
-      fxtmp += delx*fpair;
-      fytmp += dely*fpair;
-      fztmp += delz*fpair;
-      if ((NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) {
-        Kokkos::atomic_fetch_add(&f(j,0),-delx*fpair);
-        Kokkos::atomic_fetch_add(&f(j,1),-dely*fpair);
-        Kokkos::atomic_fetch_add(&f(j,2),-delz*fpair);
-      }
-
-      if ((NEIGHFLAG==HALF) && (NEWTON_PAIR || j < nlocal)) {
-        f(j,0) -= delx*fpair;
-        f(j,1) -= dely*fpair;
-        f(j,2) -= delz*fpair;
-      }
-
-      if(EVFLAG) {
-        if (eflag) {
-          ev.evdwl *= factor_lj;
-        }
-
-        if (evflag) ev_tally<NEIGHFLAG>(ev,i,j
-,fpair,delx,dely,delz);
-      }  
-    }
-  }
-
-  if (NEIGHFLAG == HALFTHREAD) {
-    Kokkos::atomic_fetch_add(&f(i,0),fxtmp);
-    Kokkos::atomic_fetch_add(&f(i,1),fytmp);
-    Kokkos::atomic_fetch_add(&f(i,2),fztmp);
-  } else {
-    f(i,0) += fxtmp;
-    f(i,1) += fytmp;
-    f(i,2) += fztmp;
-  }
-
-  return ev;
-}
-*/
-
 template<class DeviceType>
 void PairTableKokkos<DeviceType>::create_kokkos_tables()
 {
@@ -880,7 +764,6 @@ void PairTableKokkos<DeviceType>::param_extract(Table *tb, char *line)
       word = strtok(NULL," \t\n\r\f");
       tb->fphi = atof(word);
     } else {
-      printf("WORD: %s\n",word);
       error->one(FLERR,"Invalid keyword in pair table parameters");
     }
     word = strtok(NULL," \t\n\r\f");
@@ -1494,7 +1377,7 @@ void PairTableKokkos<DeviceType>::cleanup_copy() {
 }
 
 template class PairTableKokkos<LMPDeviceType>;
-#if DEVICE==2
+#ifdef KOKKOS_HAVE_CUDA
 template class PairTableKokkos<LMPHostType>;
 #endif
 
diff --git a/src/KOKKOS/pair_table_kokkos.h b/src/KOKKOS/pair_table_kokkos.h
index 317703c8955bf36db3c7f08c580c54066761a1eb..10c7c6db7baf28fb6012070b22b589f53445fafc 100644
--- a/src/KOKKOS/pair_table_kokkos.h
+++ b/src/KOKKOS/pair_table_kokkos.h
@@ -41,6 +41,7 @@ template<class DeviceType>
 class PairTableKokkos : public Pair {
  public:
 
+  enum {EnabledNeighFlags=FULL&HALFTHREAD&HALF&N2&FULLCLUSTER};
   enum {COUL_FLAG=0};
   typedef DeviceType device_type;
 
@@ -208,67 +209,10 @@ class PairTableKokkos : public Pair {
   friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,BITMAP> >;
   friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,BITMAP> >;
   friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,BITMAP> >;
-/*template<int FULL_NEIGH>
-  KOKKOS_INLINE_FUNCTION
-    void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
-                  const F_FLOAT &fpair, const F_FLOAT &delx,
-                  const F_FLOAT &dely, const F_FLOAT &delz) const;
-*/
-};
-/*
-template <class DeviceType, int NEIGHFLAG, int TABSTYLE>
-struct PairTableComputeFunctor  {
-  typedef DeviceType device_type ;
-  typedef EV_FLOAT value_type;
-
-  PairTableKokkos<DeviceType> c;
-  NeighListKokkos<DeviceType> list;
-
-  PairTableComputeFunctor(PairTableKokkos<DeviceType>* c_ptr,
-                          NeighListKokkos<DeviceType>* list_ptr):
-  c(*c_ptr),list(*list_ptr) {};
-  ~PairTableComputeFunctor() {c.cleanup_copy();list.clean_copy();};
-
-  KOKKOS_INLINE_FUNCTION
-  void operator()(const int i) const {
-    if (c.newton_pair) c.template compute_item<0,NEIGHFLAG,1,TABSTYLE>(i,list);
-    else c.template compute_item<0,NEIGHFLAG,0,TABSTYLE>(i,list);
-  }
 
-  KOKKOS_INLINE_FUNCTION
-  void operator()(const int i, value_type &energy_virial) const {
-    if (c.newton_pair)
-      energy_virial += c.template compute_item<1,NEIGHFLAG,1,TABSTYLE>(i,list);
-    else
-      energy_virial += c.template compute_item<1,NEIGHFLAG,0,TABSTYLE>(i,list);
-  }
-
-  KOKKOS_INLINE_FUNCTION
-  static void init(volatile value_type &update) {
-    update.evdwl = 0;
-    update.ecoul = 0;
-    update.v[0] = 0;
-    update.v[1] = 0;
-    update.v[2] = 0;
-    update.v[3] = 0;
-    update.v[4] = 0;
-    update.v[5] = 0;
-  }
-  KOKKOS_INLINE_FUNCTION
-  static void join(volatile value_type &update,
-                   const volatile value_type &source) {
-    update.evdwl += source.evdwl;
-    update.ecoul += source.ecoul;
-    update.v[0] += source.v[0];
-    update.v[1] += source.v[1];
-    update.v[2] += source.v[2];
-    update.v[3] += source.v[3];
-    update.v[4] += source.v[4];
-    update.v[5] += source.v[5];
-  }
+  friend void pair_virial_fdotr_compute<PairTableKokkos>(PairTableKokkos*);
 };
 
-*/
 
 
 
diff --git a/src/KOKKOS/verlet_kokkos.cpp b/src/KOKKOS/verlet_kokkos.cpp
index 5f5d1542bd3c0090286923a35bee1d2676ccde95..1e7475dc49995edf83390d4cad32648baeb68380 100644
--- a/src/KOKKOS/verlet_kokkos.cpp
+++ b/src/KOKKOS/verlet_kokkos.cpp
@@ -53,6 +53,7 @@ VerletKokkos::VerletKokkos(LAMMPS *lmp, int narg, char **arg) :
 
 void VerletKokkos::setup()
 {
+
   if (comm->me == 0 && screen) fprintf(screen,"Setting up run ...\n");
   update->setupflag = 1;
 
@@ -65,19 +66,24 @@ void VerletKokkos::setup()
   atomKK->setup();
   modify->setup_pre_exchange();
       // debug
-      atomKK->sync(Host,ALL_MASK);
-      atomKK->modified(Host,ALL_MASK);
+  atomKK->sync(Host,ALL_MASK);
+  atomKK->modified(Host,ALL_MASK);
   if (triclinic) domain->x2lamda(atomKK->nlocal);
   domain->pbc();
 
   atomKK->sync(Host,ALL_MASK);
 
+
   domain->reset_box();
   comm->setup();
   if (neighbor->style) neighbor->setup_bins();
+
   comm->exchange();
+
   if (atomKK->sortfreq > 0) atomKK->sort();
+
   comm->borders();
+
   if (triclinic) domain->lamda2x(atomKK->nlocal+atomKK->nghost);
 
   atomKK->sync(Host,ALL_MASK);
@@ -97,20 +103,47 @@ void VerletKokkos::setup()
   force_clear();
   modify->setup_pre_force(vflag);
 
-  if (pair_compute_flag) force->pair->compute(eflag,vflag);
+  if (pair_compute_flag) {
+    atomKK->sync(force->pair->execution_space,force->pair->datamask_read);
+    atomKK->modified(force->pair->execution_space,force->pair->datamask_modify);
+    force->pair->compute(eflag,vflag);
+    timer->stamp(TIME_PAIR);
+  }
   else if (force->pair) force->pair->compute_dummy(eflag,vflag);
 
+
   if (atomKK->molecular) {
-    if (force->bond) force->bond->compute(eflag,vflag);
-    if (force->angle) force->angle->compute(eflag,vflag);
-    if (force->dihedral) force->dihedral->compute(eflag,vflag);
-    if (force->improper) force->improper->compute(eflag,vflag);
+    if (force->bond) {
+      atomKK->sync(force->bond->execution_space,force->bond->datamask_read);
+      atomKK->modified(force->bond->execution_space,force->bond->datamask_modify);
+      force->bond->compute(eflag,vflag);
+    }
+    if (force->angle) {
+      atomKK->sync(force->angle->execution_space,force->angle->datamask_read);
+      atomKK->modified(force->angle->execution_space,force->angle->datamask_modify);
+      force->angle->compute(eflag,vflag);
+    }
+    if (force->dihedral) {
+      atomKK->sync(force->dihedral->execution_space,force->dihedral->datamask_read);
+      atomKK->modified(force->dihedral->execution_space,force->dihedral->datamask_modify);
+      force->dihedral->compute(eflag,vflag);
+    }
+    if (force->improper) {
+      atomKK->sync(force->improper->execution_space,force->improper->datamask_read);
+      atomKK->modified(force->improper->execution_space,force->improper->datamask_modify);
+      force->improper->compute(eflag,vflag);
+    }
+    timer->stamp(TIME_BOND);
   }
 
-  if (force->kspace) {
+  if(force->kspace) {
     force->kspace->setup();
-    if (kspace_compute_flag) force->kspace->compute(eflag,vflag);
-    else force->kspace->compute_dummy(eflag,vflag);
+    if (kspace_compute_flag) {
+      atomKK->sync(force->kspace->execution_space,force->kspace->datamask_read);
+      atomKK->modified(force->kspace->execution_space,force->kspace->datamask_modify);
+      force->kspace->compute(eflag,vflag);
+      timer->stamp(TIME_KSPACE);
+    } else force->kspace->compute_dummy(eflag,vflag);
   }
 
   if (force->newton) comm->reverse_comm();
@@ -172,20 +205,47 @@ void VerletKokkos::setup_minimal(int flag)
   force_clear();
   modify->setup_pre_force(vflag);
 
-  if (pair_compute_flag) force->pair->compute(eflag,vflag);
+  if (pair_compute_flag) {
+    atomKK->sync(force->pair->execution_space,force->pair->datamask_read);
+    atomKK->modified(force->pair->execution_space,force->pair->datamask_modify);
+    force->pair->compute(eflag,vflag);
+    timer->stamp(TIME_PAIR);
+  }
   else if (force->pair) force->pair->compute_dummy(eflag,vflag);
 
+
   if (atomKK->molecular) {
-    if (force->bond) force->bond->compute(eflag,vflag);
-    if (force->angle) force->angle->compute(eflag,vflag);
-    if (force->dihedral) force->dihedral->compute(eflag,vflag);
-    if (force->improper) force->improper->compute(eflag,vflag);
+    if (force->bond) {
+      atomKK->sync(force->bond->execution_space,force->bond->datamask_read);
+      atomKK->modified(force->bond->execution_space,force->bond->datamask_modify);
+      force->bond->compute(eflag,vflag);
+    }
+    if (force->angle) {
+      atomKK->sync(force->angle->execution_space,force->angle->datamask_read);
+      atomKK->modified(force->angle->execution_space,force->angle->datamask_modify);
+      force->angle->compute(eflag,vflag);
+    }
+    if (force->dihedral) {
+      atomKK->sync(force->dihedral->execution_space,force->dihedral->datamask_read);
+      atomKK->modified(force->dihedral->execution_space,force->dihedral->datamask_modify);
+      force->dihedral->compute(eflag,vflag);
+    }
+    if (force->improper) {
+      atomKK->sync(force->improper->execution_space,force->improper->datamask_read);
+      atomKK->modified(force->improper->execution_space,force->improper->datamask_modify);
+      force->improper->compute(eflag,vflag);
+    }
+    timer->stamp(TIME_BOND);
   }
 
-  if (force->kspace) {
+  if(force->kspace) {
     force->kspace->setup();
-    if (kspace_compute_flag) force->kspace->compute(eflag,vflag);
-    else force->kspace->compute_dummy(eflag,vflag);
+    if (kspace_compute_flag) {
+      atomKK->sync(force->kspace->execution_space,force->kspace->datamask_read);
+      atomKK->modified(force->kspace->execution_space,force->kspace->datamask_modify);
+      force->kspace->compute(eflag,vflag);
+      timer->stamp(TIME_KSPACE);
+    } else force->kspace->compute_dummy(eflag,vflag);
   }
 
   if (force->newton) comm->reverse_comm();
@@ -291,31 +351,47 @@ void VerletKokkos::run(int n)
     timer->stamp();
 
     if (pair_compute_flag) {
+      atomKK->sync(force->pair->execution_space,force->pair->datamask_read);
+      atomKK->modified(force->pair->execution_space,force->pair->datamask_modify);
       force->pair->compute(eflag,vflag);
       timer->stamp(TIME_PAIR);
     }
 
     if (atomKK->molecular) {
-      if (force->bond) force->bond->compute(eflag,vflag);
-      if (force->angle) force->angle->compute(eflag,vflag);
-      if (force->dihedral) force->dihedral->compute(eflag,vflag);
-      if (force->improper) force->improper->compute(eflag,vflag);
+      if (force->bond) {
+        atomKK->sync(force->bond->execution_space,force->bond->datamask_read);
+        atomKK->modified(force->bond->execution_space,force->bond->datamask_modify);
+        force->bond->compute(eflag,vflag);
+      }
+      if (force->angle) {
+        atomKK->sync(force->angle->execution_space,force->angle->datamask_read);
+        atomKK->modified(force->angle->execution_space,force->angle->datamask_modify);
+        force->angle->compute(eflag,vflag);
+      }
+      if (force->dihedral) {
+        atomKK->sync(force->dihedral->execution_space,force->dihedral->datamask_read);
+        atomKK->modified(force->dihedral->execution_space,force->dihedral->datamask_modify);
+        force->dihedral->compute(eflag,vflag);
+      }
+      if (force->improper) {
+        atomKK->sync(force->improper->execution_space,force->improper->datamask_read);
+        atomKK->modified(force->improper->execution_space,force->improper->datamask_modify);
+        force->improper->compute(eflag,vflag);
+      }
       timer->stamp(TIME_BOND);
     }
 
     if (kspace_compute_flag) {
+      atomKK->sync(force->kspace->execution_space,force->kspace->datamask_read);
+      atomKK->modified(force->kspace->execution_space,force->kspace->datamask_modify);
       force->kspace->compute(eflag,vflag);
       timer->stamp(TIME_KSPACE);
     }
 
     // reverse communication of forces
 
-    if (force->newton) {
-      atomKK->sync(Host,F_MASK);
-      comm->reverse_comm();
-      atomKK->modified(Host,F_MASK);
-      timer->stamp(TIME_COMM);
-    }
+    if (force->newton) comm->reverse_comm();
+    timer->stamp(TIME_COMM);
 
     // force modifications, final time integration, diagnostics