From 3545d44d0d160a3c8e111741c54e2edf20c26e9a Mon Sep 17 00:00:00 2001
From: sjplimp <sjplimp@f3b2605a-c512-4ea7-a41b-209d697bcdaa>
Date: Thu, 29 May 2014 22:51:58 +0000
Subject: [PATCH] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@12040
 f3b2605a-c512-4ea7-a41b-209d697bcdaa

---
 src/KOKKOS/Install.sh                 |   71 ++
 src/KOKKOS/atom_kokkos.cpp            |  190 ++++
 src/KOKKOS/atom_kokkos.h              |   86 ++
 src/KOKKOS/atom_vec_atomic_kokkos.cpp | 1371 ++++++++++++++++++++++
 src/KOKKOS/atom_vec_atomic_kokkos.h   |  111 ++
 src/KOKKOS/atom_vec_kokkos.cpp        |   23 +
 src/KOKKOS/atom_vec_kokkos.h          |   76 ++
 src/KOKKOS/comm_kokkos.cpp            |  820 ++++++++++++++
 src/KOKKOS/comm_kokkos.h              |   63 ++
 src/KOKKOS/domain_kokkos.cpp          |  207 ++++
 src/KOKKOS/domain_kokkos.h            |   38 +
 src/KOKKOS/fix_nve_kokkos.cpp         |  177 +++
 src/KOKKOS/fix_nve_kokkos.h           |  110 ++
 src/KOKKOS/kokkos.cpp                 |  220 ++++
 src/KOKKOS/kokkos.h                   |   40 +
 src/KOKKOS/kokkos_type.h              |  617 ++++++++++
 src/KOKKOS/memory_kokkos.h            |  208 ++++
 src/KOKKOS/modify_kokkos.cpp          |  585 ++++++++++
 src/KOKKOS/modify_kokkos.h            |   73 ++
 src/KOKKOS/neigh_full_kokkos.h        |  507 +++++++++
 src/KOKKOS/neigh_list_kokkos.cpp      |  118 ++
 src/KOKKOS/neigh_list_kokkos.h        |  104 ++
 src/KOKKOS/neighbor_kokkos.cpp        |  269 +++++
 src/KOKKOS/neighbor_kokkos.h          |  257 +++++
 src/KOKKOS/pair_kokkos.h              |  655 +++++++++++
 src/KOKKOS/pair_lj_cut_kokkos.cpp     |  267 +++++
 src/KOKKOS/pair_lj_cut_kokkos.h       |  112 ++
 src/KOKKOS/pair_table_kokkos.cpp      | 1500 +++++++++++++++++++++++++
 src/KOKKOS/pair_table_kokkos.h        |  352 ++++++
 src/KOKKOS/verlet_kokkos.cpp          |  443 ++++++++
 src/KOKKOS/verlet_kokkos.h            |   48 +
 src/MAKE/Makefile.cuda                |  111 ++
 src/Makefile                          |    2 +-
 src/atom_vec.cpp                      |    4 +-
 src/compute_property_local.cpp        |    4 +-
 35 files changed, 9835 insertions(+), 4 deletions(-)
 create mode 100644 src/KOKKOS/Install.sh
 create mode 100644 src/KOKKOS/atom_kokkos.cpp
 create mode 100644 src/KOKKOS/atom_kokkos.h
 create mode 100644 src/KOKKOS/atom_vec_atomic_kokkos.cpp
 create mode 100644 src/KOKKOS/atom_vec_atomic_kokkos.h
 create mode 100644 src/KOKKOS/atom_vec_kokkos.cpp
 create mode 100644 src/KOKKOS/atom_vec_kokkos.h
 create mode 100644 src/KOKKOS/comm_kokkos.cpp
 create mode 100644 src/KOKKOS/comm_kokkos.h
 create mode 100644 src/KOKKOS/domain_kokkos.cpp
 create mode 100644 src/KOKKOS/domain_kokkos.h
 create mode 100644 src/KOKKOS/fix_nve_kokkos.cpp
 create mode 100644 src/KOKKOS/fix_nve_kokkos.h
 create mode 100644 src/KOKKOS/kokkos.cpp
 create mode 100644 src/KOKKOS/kokkos.h
 create mode 100644 src/KOKKOS/kokkos_type.h
 create mode 100644 src/KOKKOS/memory_kokkos.h
 create mode 100644 src/KOKKOS/modify_kokkos.cpp
 create mode 100644 src/KOKKOS/modify_kokkos.h
 create mode 100644 src/KOKKOS/neigh_full_kokkos.h
 create mode 100644 src/KOKKOS/neigh_list_kokkos.cpp
 create mode 100644 src/KOKKOS/neigh_list_kokkos.h
 create mode 100644 src/KOKKOS/neighbor_kokkos.cpp
 create mode 100644 src/KOKKOS/neighbor_kokkos.h
 create mode 100644 src/KOKKOS/pair_kokkos.h
 create mode 100644 src/KOKKOS/pair_lj_cut_kokkos.cpp
 create mode 100644 src/KOKKOS/pair_lj_cut_kokkos.h
 create mode 100644 src/KOKKOS/pair_table_kokkos.cpp
 create mode 100644 src/KOKKOS/pair_table_kokkos.h
 create mode 100644 src/KOKKOS/verlet_kokkos.cpp
 create mode 100644 src/KOKKOS/verlet_kokkos.h
 create mode 100755 src/MAKE/Makefile.cuda

diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh
new file mode 100644
index 0000000000..9378eccfc7
--- /dev/null
+++ b/src/KOKKOS/Install.sh
@@ -0,0 +1,71 @@
+# Install/unInstall package files in LAMMPS
+# mode = 0/1/2 for uninstall/install/update
+
+mode=$1
+
+# arg1 = file, arg2 = file it depends on
+
+action () {
+  if (test $mode = 0) then
+    rm -f ../$1
+  elif (! cmp -s $1 ../$1) then
+    if (test -z "$2" || test -e ../$2) then
+      cp $1 ..
+      if (test $mode = 2) then
+        echo "  updating src/$1"
+      fi
+    fi
+  elif (test -n "$2") then
+    if (test ! -e ../$2) then
+      rm -f ../$1
+    fi
+  fi
+}
+
+# force rebuild of files with LMP_KOKKOS switch
+
+touch ../accelerator_kokkos.h
+touch ../memory.h
+
+# all package files with no dependencies
+
+for file in *.cpp *.h; do
+  action $file
+done
+
+# edit 2 Makefile.package files to include/exclude package info
+
+if (test $1 = 1) then
+
+  if (test -e ../Makefile.package) then
+    sed -i -e 's/[^ \t]*kokkos[^ \t]* //g' ../Makefile.package
+    sed -i -e 's/[^ \t]*KOKKOS[^ \t]* //g' ../Makefile.package
+    sed -i -e 's|^PKG_INC =[ \t]*|&-I..\/..\/lib\/kokkos\/core\/src -I../../lib/kokkos/containers/src -DLMP_KOKKOS |' ../Makefile.package
+    sed -i -e 's|^PKG_PATH =[ \t]*|&-L..\/..\/lib\/kokkos\/core\/src |' ../Makefile.package
+    sed -i -e 's|^PKG_LIB =[ \t]*|&-lkokkoscore |' ../Makefile.package
+    sed -i -e 's|^PKG_SYSINC =[ \t]*|&$(kokkos_SYSINC) |' ../Makefile.package
+    sed -i -e 's|^PKG_SYSLIB =[ \t]*|&$(kokkos_SYSLIB) |' ../Makefile.package
+    sed -i -e 's|^PKG_SYSPATH =[ \t]*|&$(kokkos_SYSPATH) |' ../Makefile.package
+  fi
+
+  if (test -e ../Makefile.package.settings) then
+    sed -i -e '/^include.*kokkos.*$/d' ../Makefile.package.settings
+    # multiline form needed for BSD sed on Macs
+    sed -i -e '4 i \
+include ..\/..\/lib\/kokkos\/Makefile.lammps
+' ../Makefile.package.settings
+
+  fi
+
+elif (test $1 = 0) then
+
+  if (test -e ../Makefile.package) then
+    sed -i -e 's/[^ \t]*kokkos[^ \t]* //g' ../Makefile.package
+    sed -i -e 's/[^ \t]*KOKKOS[^ \t]* //g' ../Makefile.package
+  fi
+
+  if (test -e ../Makefile.package.settings) then
+    sed -i -e '/^include.*kokkos.*$/d' ../Makefile.package.settings
+  fi
+
+fi
diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp
new file mode 100644
index 0000000000..e36a5a926c
--- /dev/null
+++ b/src/KOKKOS/atom_kokkos.cpp
@@ -0,0 +1,190 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "mpi.h"
+#include "atom_kokkos.h"
+#include "atom_vec.h"
+#include "atom_vec_kokkos.h"
+#include "comm_kokkos.h"
+#include "update.h"
+#include "domain.h"
+#include "atom_masks.h"
+#include "memory.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+AtomKokkos::AtomKokkos(LAMMPS *lmp) : Atom(lmp)
+{
+  // set CommKokkos pointer to Atom class, since CommKokkos allocated first
+
+  ((CommKokkos *) comm)->atomKK = this;
+}
+
+/* ---------------------------------------------------------------------- */
+
+AtomKokkos::~AtomKokkos()
+{
+  k_tag = DAT::tdual_int_1d();
+  k_mask = DAT::tdual_int_1d();
+  k_type = DAT::tdual_int_1d();
+  k_image = DAT::tdual_int_1d();
+  k_molecule = DAT::tdual_int_1d();
+
+  k_x = DAT::tdual_x_array();
+  k_v = DAT::tdual_v_array();
+  k_f = DAT::tdual_f_array();
+
+  k_mass = DAT::tdual_float_1d();
+
+  tag = NULL;
+  mask = NULL;
+  type = NULL;
+  image = NULL;
+  molecule = NULL;
+  mass = NULL;
+
+  memory->sfree(x);
+  memory->sfree(v);
+  memory->sfree(f);
+  x = NULL;
+  v = NULL;
+  f = NULL;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomKokkos::sync(const ExecutionSpace space, unsigned int mask)
+{
+  ((AtomVecKokkos *) avec)->sync(space,mask);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomKokkos::modified(const ExecutionSpace space, unsigned int mask)
+{
+  ((AtomVecKokkos *) avec)->modified(space,mask);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomKokkos::allocate_type_arrays()
+{
+  if (avec->mass_type) {
+    k_mass = DAT::tdual_float_1d("Mass",ntypes+1);
+    mass = k_mass.h_view.ptr_on_device();
+    mass_setflag = new int[ntypes+1];
+    for (int itype = 1; itype <= ntypes; itype++) mass_setflag[itype] = 0;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomKokkos::sort()
+{
+  int i,m,n,ix,iy,iz,ibin,empty;
+
+  sync(Host,ALL_MASK);
+  modified(Host,ALL_MASK);
+
+  // set next timestep for sorting to take place
+
+  nextsort = (update->ntimestep/sortfreq)*sortfreq + sortfreq;
+
+  // re-setup sort bins if needed
+
+  if (domain->box_change) setup_sort_bins();
+  if (nbins == 1) return;
+
+  // reallocate per-atom vectors if needed
+
+  if (nlocal > maxnext) {
+    memory->destroy(next);
+    memory->destroy(permute);
+    maxnext = atom->nmax;
+    memory->create(next,maxnext,"atom:next");
+    memory->create(permute,maxnext,"atom:permute");
+  }
+
+  // insure there is one extra atom location at end of arrays for swaps
+
+  if (nlocal == nmax) avec->grow(0);
+
+  // bin atoms in reverse order so linked list will be in forward order
+
+  for (i = 0; i < nbins; i++) binhead[i] = -1;
+
+  HAT::t_x_array_const h_x = k_x.view<LMPHostType>();
+  for (i = nlocal-1; i >= 0; i--) {
+    ix = static_cast<int> ((h_x(i,0)-bboxlo[0])*bininvx);
+    iy = static_cast<int> ((h_x(i,1)-bboxlo[1])*bininvy);
+    iz = static_cast<int> ((h_x(i,2)-bboxlo[2])*bininvz);
+    ix = MAX(ix,0);
+    iy = MAX(iy,0);
+    iz = MAX(iz,0);
+    ix = MIN(ix,nbinx-1);
+    iy = MIN(iy,nbiny-1);
+    iz = MIN(iz,nbinz-1);
+    ibin = iz*nbiny*nbinx + iy*nbinx + ix;
+    next[i] = binhead[ibin];
+    binhead[ibin] = i;
+  }
+
+  // permute = desired permutation of atoms
+  // permute[I] = J means Ith new atom will be Jth old atom
+
+  n = 0;
+  for (m = 0; m < nbins; m++) {
+    i = binhead[m];
+    while (i >= 0) {
+      permute[n++] = i;
+      i = next[i];
+    }
+  }
+
+  // current = current permutation, just reuse next vector
+  // current[I] = J means Ith current atom is Jth old atom
+
+  int *current = next;
+  for (i = 0; i < nlocal; i++) current[i] = i;
+
+  // reorder local atom list, when done, current = permute
+  // perform "in place" using copy() to extra atom location at end of list
+  // inner while loop processes one cycle of the permutation
+  // copy before inner-loop moves an atom to end of atom list
+  // copy after inner-loop moves atom at end of list back into list
+  // empty = location in atom list that is currently empty
+
+  for (i = 0; i < nlocal; i++) {
+    if (current[i] == permute[i]) continue;
+    avec->copy(i,nlocal,0);
+    empty = i;
+    while (permute[empty] != i) {
+      avec->copy(permute[empty],empty,0);
+      empty = current[empty] = permute[empty];
+    }
+    avec->copy(nlocal,empty,0);
+    current[empty] = permute[empty];
+  }
+
+  // sanity check that current = permute
+
+  //int flag = 0;
+  //for (i = 0; i < nlocal; i++)
+  //  if (current[i] != permute[i]) flag = 1;
+  //int flagall;
+  //MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world);
+  //if (flagall) error->all(FLERR,"Atom sort did not operate correctly");
+}
diff --git a/src/KOKKOS/atom_kokkos.h b/src/KOKKOS/atom_kokkos.h
new file mode 100644
index 0000000000..594bf80e5f
--- /dev/null
+++ b/src/KOKKOS/atom_kokkos.h
@@ -0,0 +1,86 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "atom.h"
+#include "kokkos_type.h"
+
+#ifndef LMP_ATOM_KOKKOS_H
+#define LMP_ATOM_KOKKOS_H
+
+namespace LAMMPS_NS {
+
+class AtomKokkos : public Atom {
+ public:
+  DAT::tdual_int_1d k_tag, k_type, k_mask, k_molecule;
+  DAT::tdual_tagint_1d k_image;
+  DAT::tdual_x_array k_x;
+  DAT::tdual_v_array k_v;
+  DAT::tdual_f_array k_f;
+
+  DAT::tdual_float_1d k_mass;
+
+  AtomKokkos(class LAMMPS *);
+  ~AtomKokkos();
+
+  virtual void allocate_type_arrays();
+  void sync(const ExecutionSpace space, unsigned int mask);
+  void modified(const ExecutionSpace space, unsigned int mask);
+  virtual void sort();
+};
+
+template<class ViewType, class IndexView>
+class SortFunctor {
+  typedef typename ViewType::device_type device_type;
+  ViewType source;
+  Kokkos::View<typename ViewType::non_const_data_type,typename ViewType::array_type,device_type> dest;
+  IndexView index;
+  SortFunctor(ViewType src, typename Kokkos::Impl::enable_if<ViewType::dynamic_rank==1,IndexView>::type ind):source(src),index(ind){
+    dest = Kokkos::View<typename ViewType::non_const_data_type,typename ViewType::array_type,device_type>("",src.dimension_0());
+  }
+  SortFunctor(ViewType src, typename Kokkos::Impl::enable_if<ViewType::dynamic_rank==2,IndexView>::type ind):source(src),index(ind){
+    dest = Kokkos::View<typename ViewType::non_const_data_type,typename ViewType::array_type,device_type>("",src.dimension_0(),src.dimension_1());
+  }
+  SortFunctor(ViewType src, typename Kokkos::Impl::enable_if<ViewType::dynamic_rank==3,IndexView>::type ind):source(src),index(ind){
+    dest = Kokkos::View<typename ViewType::non_const_data_type,typename ViewType::array_type,device_type>("",src.dimension_0(),src.dimension_1(),src.dimension_2());
+  }
+  SortFunctor(ViewType src, typename Kokkos::Impl::enable_if<ViewType::dynamic_rank==4,IndexView>::type ind):source(src),index(ind){
+    dest = Kokkos::View<typename ViewType::non_const_data_type,typename ViewType::array_type,device_type>("",src.dimension_0(),src.dimension_1(),src.dimension_2(),src.dimension_3());
+  }
+  KOKKOS_INLINE_FUNCTION
+  void operator()(const typename Kokkos::Impl::enable_if<ViewType::rank==1, int>::type& i) {
+    dest(i) = source(index(i));
+  }
+  void operator()(const typename Kokkos::Impl::enable_if<ViewType::rank==2, int>::type& i) {
+    for(int j=0;j<source.dimension_1();j++)
+      dest(i,j) = source(index(i),j);
+  }
+  void operator()(const typename Kokkos::Impl::enable_if<ViewType::rank==3, int>::type& i) {
+    for(int j=0;j<source.dimension_1();j++)
+    for(int k=0;k<source.dimension_2();k++)
+      dest(i,j,k) = source(index(i),j,k);
+  }
+  void operator()(const typename Kokkos::Impl::enable_if<ViewType::rank==4, int>::type& i) {
+    for(int j=0;j<source.dimension_1();j++)
+    for(int k=0;k<source.dimension_2();k++)
+    for(int l=0;l<source.dimension_3();l++)
+      dest(i,j,k,l) = source(index(i),j,k,l);
+  }
+};
+
+}
+
+#endif
+
+/* ERROR/WARNING messages:
+
+*/
diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.cpp b/src/KOKKOS/atom_vec_atomic_kokkos.cpp
new file mode 100644
index 0000000000..1db293cd44
--- /dev/null
+++ b/src/KOKKOS/atom_vec_atomic_kokkos.cpp
@@ -0,0 +1,1371 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale AtomicKokkos/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "stdlib.h"
+#include "atom_vec_atomic_kokkos.h"
+#include "atom_kokkos.h"
+#include "comm_kokkos.h"
+#include "domain.h"
+#include "modify.h"
+#include "fix.h"
+#include "atom_masks.h"
+#include "memory.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+#define DELTA 10000
+
+/* ---------------------------------------------------------------------- */
+
+AtomVecAtomicKokkos::AtomVecAtomicKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp)
+{
+  molecular = 0;
+  mass_type = 1;
+
+  comm_x_only = comm_f_only = 1;
+  size_forward = 3;
+  size_reverse = 3;
+  size_border = 6;
+  size_velocity = 3;
+  size_data_atom = 5;
+  size_data_vel = 4;
+  xcol_data = 3;
+
+  k_count = DAT::tdual_int_1d("atom::k_count",1);
+  atomKK = (AtomKokkos *) atom;
+  commKK = (CommKokkos *) comm;
+}
+
+/* ----------------------------------------------------------------------
+   grow atom arrays
+   n = 0 grows arrays by DELTA
+   n > 0 allocates arrays to size n
+------------------------------------------------------------------------- */
+
+void AtomVecAtomicKokkos::grow(int n)
+{
+  if (n == 0) nmax += DELTA;
+  else nmax = n;
+  atomKK->nmax = nmax;
+  if (nmax < 0 || nmax > MAXSMALLINT)
+    error->one(FLERR,"Per-processor system is too big");
+
+  sync(Device,ALL_MASK);
+  modified(Device,ALL_MASK);
+
+  memory->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag");
+  memory->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type");
+  memory->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask");
+  memory->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image");
+
+  memory->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x");
+  memory->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v");
+  memory->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f");
+
+  grow_reset();
+  sync(Host,ALL_MASK);
+
+  if (atom->nextra_grow)
+    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
+      modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax);
+}
+
+/* ----------------------------------------------------------------------
+   reset local array ptrs
+------------------------------------------------------------------------- */
+
+void AtomVecAtomicKokkos::grow_reset()
+{
+  tag = atomKK->tag; 
+  d_tag = atomKK->k_tag.d_view; 
+  h_tag = atomKK->k_tag.h_view;
+
+  type = atomKK->type; 
+  d_type = atomKK->k_type.d_view; 
+  h_type = atomKK->k_type.h_view;
+  mask = atomKK->mask; 
+  d_mask = atomKK->k_mask.d_view; 
+  h_mask = atomKK->k_mask.h_view;
+  image = atomKK->image; 
+  d_image = atomKK->k_image.d_view; 
+  h_image = atomKK->k_image.h_view;
+
+  x = atomKK->x; 
+  d_x = atomKK->k_x.d_view; 
+  h_x = atomKK->k_x.h_view;
+  v = atomKK->v; 
+  d_v = atomKK->k_v.d_view; 
+  h_v = atomKK->k_v.h_view;
+  f = atomKK->f; 
+  d_f = atomKK->k_f.d_view; 
+  h_f = atomKK->k_f.h_view;
+}
+
+/* ----------------------------------------------------------------------
+   copy atom I info to atom J
+------------------------------------------------------------------------- */
+
+void AtomVecAtomicKokkos::copy(int i, int j, int delflag)
+{
+  h_tag[j] = h_tag[i];
+  h_type[j] = h_type[i];
+  mask[j] = mask[i];
+  h_image[j] = h_image[i];
+  h_x(j,0) = h_x(i,0);
+  h_x(j,1) = h_x(i,1);
+  h_x(j,2) = h_x(i,2);
+  h_v(j,0) = h_v(i,0);
+  h_v(j,1) = h_v(i,1);
+  h_v(j,2) = h_v(i,2);
+
+  if (atom->nextra_grow)
+    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
+      modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType,int PBC_FLAG,int TRICLINIC>
+struct AtomVecAtomicKokkos_PackComm {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
+  typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
+  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
+  const int _iswap;
+  X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
+  X_FLOAT _pbc[6];
+
+  AtomVecAtomicKokkos_PackComm(
+      const typename DAT::tdual_x_array &x,
+      const typename DAT::tdual_xfloat_2d &buf,
+      const typename DAT::tdual_int_2d &list,
+      const int & iswap,
+      const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
+      const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
+      _x(x.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap),
+      _xprd(xprd),_yprd(yprd),_zprd(zprd),
+      _xy(xy),_xz(xz),_yz(yz) {
+        const size_t maxsend = (buf.view<DeviceType>().dimension_0()*buf.view<DeviceType>().dimension_1())/3;
+        const size_t elements = 3;
+        buffer_view<DeviceType>(_buf,buf,maxsend,elements);
+        _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
+        _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
+  };
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+        const int j = _list(_iswap,i);
+      if (PBC_FLAG == 0) {
+          _buf(i,0) = _x(j,0);
+          _buf(i,1) = _x(j,1);
+          _buf(i,2) = _x(j,2);
+      } else {
+        if (TRICLINIC == 0) {
+          _buf(i,0) = _x(j,0) + _pbc[0]*_xprd;
+          _buf(i,1) = _x(j,1) + _pbc[1]*_yprd;
+          _buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
+        } else {
+          _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
+          _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
+          _buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
+        }
+      }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecAtomicKokkos::pack_comm_kokkos(const int &n, 
+                                          const DAT::tdual_int_2d &list, 
+                                          const int & iswap,
+                                          const DAT::tdual_xfloat_2d &buf,
+                                          const int &pbc_flag,
+                                          const int* const pbc)
+{
+  // Check whether to always run forward communication on the host
+  // Choose correct forward PackComm kernel
+
+  if(commKK->forward_comm_on_host) {
+    sync(Host,X_MASK);
+    if(pbc_flag) {
+      if(domain->triclinic) {
+        struct AtomVecAtomicKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      } else {
+        struct AtomVecAtomicKokkos_PackComm<LMPHostType,1,0> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      }
+    } else {
+      if(domain->triclinic) {
+        struct AtomVecAtomicKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      } else {
+        struct AtomVecAtomicKokkos_PackComm<LMPHostType,0,0> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      }
+    }
+    LMPHostType::fence();
+  } else {
+    sync(Device,X_MASK);
+    if(pbc_flag) {
+      if(domain->triclinic) {
+        struct AtomVecAtomicKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      } else {
+        struct AtomVecAtomicKokkos_PackComm<LMPDeviceType,1,0> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      }
+    } else {
+      if(domain->triclinic) {
+        struct AtomVecAtomicKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      } else {
+        struct AtomVecAtomicKokkos_PackComm<LMPDeviceType,0,0> f(atomKK->k_x,buf,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+        Kokkos::parallel_for(n,f);
+      }
+    }
+    LMPDeviceType::fence();
+  }
+
+	return n*size_forward;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType,int PBC_FLAG,int TRICLINIC>
+struct AtomVecAtomicKokkos_PackCommSelf {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
+  typename ArrayTypes<DeviceType>::t_x_array _xw;
+  int _nfirst;
+  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
+  const int _iswap;
+  X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
+  X_FLOAT _pbc[6];
+
+  AtomVecAtomicKokkos_PackCommSelf(
+      const typename DAT::tdual_x_array &x,
+      const int &nfirst,
+      const typename DAT::tdual_int_2d &list,
+      const int & iswap,
+      const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
+      const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
+      _x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),_nfirst(nfirst),_list(list.view<DeviceType>()),_iswap(iswap),
+      _xprd(xprd),_yprd(yprd),_zprd(zprd),
+      _xy(xy),_xz(xz),_yz(yz) {
+        _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
+        _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
+  };
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+        const int j = _list(_iswap,i);
+      if (PBC_FLAG == 0) {
+          _xw(i+_nfirst,0) = _x(j,0);
+          _xw(i+_nfirst,1) = _x(j,1);
+          _xw(i+_nfirst,2) = _x(j,2);
+      } else {
+        if (TRICLINIC == 0) {
+          _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd;
+          _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd;
+          _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
+        } else {
+          _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
+          _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
+          _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
+        }
+      }
+
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecAtomicKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap,
+										const int nfirst, const int &pbc_flag, const int* const pbc) {
+  if(commKK->forward_comm_on_host) {
+    sync(Host,X_MASK);
+    modified(Host,X_MASK);
+    if(pbc_flag) {
+      if(domain->triclinic) {
+      struct AtomVecAtomicKokkos_PackCommSelf<LMPHostType,1,1> f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      } else {
+      struct AtomVecAtomicKokkos_PackCommSelf<LMPHostType,1,0> f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      }
+    } else {
+      if(domain->triclinic) {
+      struct AtomVecAtomicKokkos_PackCommSelf<LMPHostType,0,1> f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      } else {
+      struct AtomVecAtomicKokkos_PackCommSelf<LMPHostType,0,0> f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      }
+    }
+    LMPHostType::fence();
+  } else {
+    sync(Device,X_MASK);
+    modified(Device,X_MASK);
+    if(pbc_flag) {
+      if(domain->triclinic) {
+      struct AtomVecAtomicKokkos_PackCommSelf<LMPDeviceType,1,1> f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      } else {
+      struct AtomVecAtomicKokkos_PackCommSelf<LMPDeviceType,1,0> f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      }
+    } else {
+      if(domain->triclinic) {
+      struct AtomVecAtomicKokkos_PackCommSelf<LMPDeviceType,0,1> f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      } else {
+      struct AtomVecAtomicKokkos_PackCommSelf<LMPDeviceType,0,0> f(atomKK->k_x,nfirst,list,iswap,
+          domain->xprd,domain->yprd,domain->zprd,
+          domain->xy,domain->xz,domain->yz,pbc);
+      Kokkos::parallel_for(n,f);
+      }
+    }
+    LMPDeviceType::fence();
+  }
+	return n*3;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecAtomicKokkos_UnpackComm {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_x_array _x;
+  typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf;
+  int _first;
+
+  AtomVecAtomicKokkos_UnpackComm(
+      const typename DAT::tdual_x_array &x,
+      const typename DAT::tdual_xfloat_2d &buf,
+      const int& first):_x(x.view<DeviceType>()),_buf(buf.view<DeviceType>()),
+                        _first(first) {};
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+      _x(i+_first,0) = _buf(i,0);
+      _x(i+_first,1) = _buf(i,1);
+      _x(i+_first,2) = _buf(i,2);
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecAtomicKokkos::unpack_comm_kokkos(const int &n, const int &first,
+    const DAT::tdual_xfloat_2d &buf ) {
+  if(commKK->forward_comm_on_host) {
+    sync(Host,X_MASK);
+    modified(Host,X_MASK);
+    struct AtomVecAtomicKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
+    Kokkos::parallel_for(n,f);
+    LMPDeviceType::fence();
+  } else {
+    sync(Device,X_MASK);
+    modified(Device,X_MASK);
+    struct AtomVecAtomicKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
+    Kokkos::parallel_for(n,f);
+    LMPDeviceType::fence();
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecAtomicKokkos::pack_comm(int n, int *list, double *buf,
+                             int pbc_flag, int *pbc)
+{
+  int i,j,m;
+  double dx,dy,dz;
+
+  m = 0;
+  if (pbc_flag == 0) {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0);
+      buf[m++] = h_x(j,1);
+      buf[m++] = h_x(j,2);
+    }
+  } else {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
+      dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
+      dz = pbc[2]*domain->zprd;
+    }
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0) + dx;
+      buf[m++] = h_x(j,1) + dy;
+      buf[m++] = h_x(j,2) + dz;
+    }
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecAtomicKokkos::pack_comm_vel(int n, int *list, double *buf,
+                                 int pbc_flag, int *pbc)
+{
+  int i,j,m;
+  double dx,dy,dz,dvx,dvy,dvz;
+
+  m = 0;
+  if (pbc_flag == 0) {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0);
+      buf[m++] = h_x(j,1);
+      buf[m++] = h_x(j,2);
+      buf[m++] = h_v(j,0);
+      buf[m++] = h_v(j,1);
+      buf[m++] = h_v(j,2);
+    }
+  } else {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
+      dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
+      dz = pbc[2]*domain->zprd;
+    }
+    if (!deform_vremap) {
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0) + dx;
+        buf[m++] = h_x(j,1) + dy;
+        buf[m++] = h_x(j,2) + dz;
+        buf[m++] = h_v(j,0);
+        buf[m++] = h_v(j,1);
+        buf[m++] = h_v(j,2);
+      }
+    } else {
+      dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
+      dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
+      dvz = pbc[2]*h_rate[2];
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0) + dx;
+        buf[m++] = h_x(j,1) + dy;
+        buf[m++] = h_x(j,2) + dz;
+        if (mask[i] & deform_groupbit) {
+          buf[m++] = h_v(j,0) + dvx;
+          buf[m++] = h_v(j,1) + dvy;
+          buf[m++] = h_v(j,2) + dvz;
+        } else {
+          buf[m++] = h_v(j,0);
+          buf[m++] = h_v(j,1);
+          buf[m++] = h_v(j,2);
+        }
+      }
+    }
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecAtomicKokkos::unpack_comm(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    h_x(i,0) = buf[m++];
+    h_x(i,1) = buf[m++];
+    h_x(i,2) = buf[m++];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecAtomicKokkos::unpack_comm_vel(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    h_x(i,0) = buf[m++];
+    h_x(i,1) = buf[m++];
+    h_x(i,2) = buf[m++];
+    h_v(i,0) = buf[m++];
+    h_v(i,1) = buf[m++];
+    h_v(i,2) = buf[m++];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecAtomicKokkos::pack_reverse(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    buf[m++] = f[i][0];
+    buf[m++] = f[i][1];
+    buf[m++] = f[i][2];
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecAtomicKokkos::unpack_reverse(int n, int *list, double *buf)
+{
+  int i,j,m;
+
+  m = 0;
+  for (i = 0; i < n; i++) {
+    j = list[i];
+    f[j][0] += buf[m++];
+    f[j][1] += buf[m++];
+    f[j][2] += buf[m++];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType,int PBC_FLAG>
+struct AtomVecAtomicKokkos_PackBorder {
+  typedef DeviceType device_type;
+
+  typename ArrayTypes<DeviceType>::t_xfloat_2d _buf;
+  const typename ArrayTypes<DeviceType>::t_int_2d_const _list;
+  const typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
+  const typename ArrayTypes<DeviceType>::t_tagint_1d _tag;
+  const typename ArrayTypes<DeviceType>::t_int_1d _type;
+  const typename ArrayTypes<DeviceType>::t_int_1d _mask;
+  const int _iswap;
+  X_FLOAT _dx,_dy,_dz;
+
+  AtomVecAtomicKokkos_PackBorder(
+      const typename ArrayTypes<DeviceType>::t_xfloat_2d &buf,
+      const typename ArrayTypes<DeviceType>::t_int_2d_const &list,
+      const int & iswap,
+      const typename ArrayTypes<DeviceType>::t_x_array &x,
+      const typename ArrayTypes<DeviceType>::t_tagint_1d &tag,
+      const typename ArrayTypes<DeviceType>::t_int_1d &type,
+      const typename ArrayTypes<DeviceType>::t_int_1d &mask,
+      const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz):
+      _buf(buf),_list(list),_iswap(iswap),
+      _x(x),_tag(tag),_type(type),_mask(mask),
+      _dx(dx),_dy(dy),_dz(dz) {}
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+      const int j = _list(_iswap,i);
+      if (PBC_FLAG == 0) {
+          _buf(i,0) = _x(j,0);
+          _buf(i,1) = _x(j,1);
+          _buf(i,2) = _x(j,2);
+          _buf(i,3) = _tag(j);
+          _buf(i,4) = _type(j);
+          _buf(i,5) = _mask(j);
+      } else {
+          _buf(i,0) = _x(j,0) + _dx;
+          _buf(i,1) = _x(j,1) + _dy;
+          _buf(i,2) = _x(j,2) + _dz;
+          _buf(i,3) = _tag(j);
+          _buf(i,4) = _type(j);
+          _buf(i,5) = _mask(j);
+      }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecAtomicKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap,
+                               int pbc_flag, int *pbc, ExecutionSpace space)
+{
+  X_FLOAT dx,dy,dz;
+
+  if (pbc_flag != 0) {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0];
+      dy = pbc[1];
+      dz = pbc[2];
+    }
+    if(space==Host) {
+      AtomVecAtomicKokkos_PackBorder<LMPHostType,1> f(
+        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
+        iswap,h_x,h_tag,h_type,h_mask,dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+      LMPHostType::fence();
+    } else {
+      AtomVecAtomicKokkos_PackBorder<LMPDeviceType,1> f(
+        buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
+        iswap,d_x,d_tag,d_type,d_mask,dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+      LMPDeviceType::fence();
+    }
+
+  } else {
+    dx = dy = dz = 0;
+    if(space==Host) {
+      AtomVecAtomicKokkos_PackBorder<LMPHostType,0> f(
+        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
+        iswap,h_x,h_tag,h_type,h_mask,dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+      LMPHostType::fence();
+    } else {
+      AtomVecAtomicKokkos_PackBorder<LMPDeviceType,0> f(
+        buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
+        iswap,d_x,d_tag,d_type,d_mask,dx,dy,dz);
+      Kokkos::parallel_for(n,f);
+      LMPDeviceType::fence();
+    }
+  }
+  return n*6;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecAtomicKokkos::pack_border(int n, int *list, double *buf,
+                               int pbc_flag, int *pbc)
+{
+  int i,j,m;
+  double dx,dy,dz;
+
+  m = 0;
+  if (pbc_flag == 0) {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0);
+      buf[m++] = h_x(j,1);
+      buf[m++] = h_x(j,2);
+      buf[m++] = h_tag[j];
+      buf[m++] = h_type[j];
+      buf[m++] = h_mask[j];
+    }
+  } else {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0];
+      dy = pbc[1];
+      dz = pbc[2];
+    }
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0) + dx;
+      buf[m++] = h_x(j,1) + dy;
+      buf[m++] = h_x(j,2) + dz;
+      buf[m++] = h_tag[j];
+      buf[m++] = h_type[j];
+      buf[m++] = h_mask[j];
+    }
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecAtomicKokkos::pack_border_vel(int n, int *list, double *buf,
+                                   int pbc_flag, int *pbc)
+{
+  int i,j,m;
+  double dx,dy,dz,dvx,dvy,dvz;
+
+  m = 0;
+  if (pbc_flag == 0) {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = h_x(j,0);
+      buf[m++] = h_x(j,1);
+      buf[m++] = h_x(j,2);
+      buf[m++] = h_tag[j];
+      buf[m++] = h_type[j];
+      buf[m++] = h_mask[j];
+      buf[m++] = h_v(j,0);
+      buf[m++] = h_v(j,1);
+      buf[m++] = h_v(j,2);
+    }
+  } else {
+    if (domain->triclinic == 0) {
+      dx = pbc[0]*domain->xprd;
+      dy = pbc[1]*domain->yprd;
+      dz = pbc[2]*domain->zprd;
+    } else {
+      dx = pbc[0];
+      dy = pbc[1];
+      dz = pbc[2];
+    }
+    if (!deform_vremap) {
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0) + dx;
+        buf[m++] = h_x(j,1) + dy;
+        buf[m++] = h_x(j,2) + dz;
+        buf[m++] = h_tag[j];
+        buf[m++] = h_type[j];
+        buf[m++] = h_mask[j];
+        buf[m++] = h_v(j,0);
+        buf[m++] = h_v(j,1);
+        buf[m++] = h_v(j,2);
+      }
+    } else {
+      dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
+      dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
+      dvz = pbc[2]*h_rate[2];
+      for (i = 0; i < n; i++) {
+        j = list[i];
+        buf[m++] = h_x(j,0) + dx;
+        buf[m++] = h_x(j,1) + dy;
+        buf[m++] = h_x(j,2) + dz;
+        buf[m++] = h_tag[j];
+        buf[m++] = h_type[j];
+        buf[m++] = h_mask[j];
+        if (mask[i] & deform_groupbit) {
+          buf[m++] = h_v(j,0) + dvx;
+          buf[m++] = h_v(j,1) + dvy;
+          buf[m++] = h_v(j,2) + dvz;
+        } else {
+          buf[m++] = h_v(j,0);
+          buf[m++] = h_v(j,1);
+          buf[m++] = h_v(j,2);
+        }
+      }
+    }
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecAtomicKokkos_UnpackBorder {
+  typedef DeviceType device_type;
+
+  const typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf;
+  typename ArrayTypes<DeviceType>::t_x_array _x;
+  typename ArrayTypes<DeviceType>::t_tagint_1d _tag;
+  typename ArrayTypes<DeviceType>::t_int_1d _type;
+  typename ArrayTypes<DeviceType>::t_int_1d _mask;
+  int _first;
+
+
+  AtomVecAtomicKokkos_UnpackBorder(
+      const typename ArrayTypes<DeviceType>::t_xfloat_2d_const &buf,
+      typename ArrayTypes<DeviceType>::t_x_array &x,
+      typename ArrayTypes<DeviceType>::t_tagint_1d &tag,
+      typename ArrayTypes<DeviceType>::t_int_1d &type,
+      typename ArrayTypes<DeviceType>::t_int_1d &mask,
+      const int& first):
+      _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_first(first){
+  };
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& i) const {
+      _x(i+_first,0) = _buf(i,0);
+      _x(i+_first,1) = _buf(i,1);
+      _x(i+_first,2) = _buf(i,2);
+      _tag(i+_first) = static_cast<int> (_buf(i,3));
+      _type(i+_first) = static_cast<int>  (_buf(i,4));
+      _mask(i+_first) = static_cast<int>  (_buf(i,5));
+//      printf("%i %i %lf %lf %lf %i BORDER\n",_tag(i+_first),i+_first,_x(i+_first,0),_x(i+_first,1),_x(i+_first,2),_type(i+_first));
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecAtomicKokkos::unpack_border_kokkos(const int &n, const int &first,
+                     const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) {
+  modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
+  while (first+n >= nmax) grow(0);
+  modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
+  if(space==Host) {
+    struct AtomVecAtomicKokkos_UnpackBorder<LMPHostType> f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,first);
+    Kokkos::parallel_for(n,f);
+    LMPHostType::fence();
+  } else {
+    struct AtomVecAtomicKokkos_UnpackBorder<LMPDeviceType> f(buf.view<LMPDeviceType>(),d_x,d_tag,d_type,d_mask,first);
+    Kokkos::parallel_for(n,f);
+    LMPDeviceType::fence();
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecAtomicKokkos::unpack_border(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    if (i == nmax) grow(0);
+    modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
+    h_x(i,0) = buf[m++];
+    h_x(i,1) = buf[m++];
+    h_x(i,2) = buf[m++];
+    h_tag[i] = static_cast<int> (buf[m++]);
+    h_type[i] = static_cast<int> (buf[m++]);
+    h_mask[i] = static_cast<int> (buf[m++]);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecAtomicKokkos::unpack_border_vel(int n, int first, double *buf)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    if (i == nmax) grow(0);
+    h_x(i,0) = buf[m++];
+    h_x(i,1) = buf[m++];
+    h_x(i,2) = buf[m++];
+    h_tag[i] = static_cast<int> (buf[m++]);
+    h_type[i] = static_cast<int> (buf[m++]);
+    h_mask[i] = static_cast<int> (buf[m++]);
+    h_v(i,0) = buf[m++];
+    h_v(i,1) = buf[m++];
+    h_v(i,2) = buf[m++];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecAtomicKokkos_PackExchangeFunctor {
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+  X_FLOAT _lo,_hi;
+  typename AT::t_x_array_randomread _x;
+  typename AT::t_v_array_randomread _v;
+  typename AT::t_tagint_1d_randomread _tag;
+  typename AT::t_int_1d_randomread _type;
+  typename AT::t_int_1d_randomread _mask;
+  typename AT::t_int_1d_randomread _image;
+  typename AT::t_x_array _xw;
+  typename AT::t_v_array _vw;
+  typename AT::t_tagint_1d _tagw;
+  typename AT::t_int_1d _typew;
+  typename AT::t_int_1d _maskw;
+  typename AT::t_int_1d _imagew;
+
+  typename AT::t_xfloat_2d_um _buf;
+  int _nlocal,_dim;
+  typename AT::t_int_1d_const _sendlist;
+  typename AT::t_int_1d_const _copylist;
+
+  AtomVecAtomicKokkos_PackExchangeFunctor(
+      const AtomKokkos* atom,
+      const typename AT::tdual_xfloat_2d buf,
+      typename AT::tdual_int_1d sendlist,
+      typename AT::tdual_int_1d copylist,int nlocal, int dim,
+                X_FLOAT lo, X_FLOAT hi):
+                _x(atom->k_x.view<DeviceType>()),
+                _v(atom->k_v.view<DeviceType>()),
+                _tag(atom->k_tag.view<DeviceType>()),
+                _type(atom->k_type.view<DeviceType>()),
+                _mask(atom->k_mask.view<DeviceType>()),
+                _image(atom->k_image.view<DeviceType>()),
+                _xw(atom->k_x.view<DeviceType>()),
+                _vw(atom->k_v.view<DeviceType>()),
+                _tagw(atom->k_tag.view<DeviceType>()),
+                _typew(atom->k_type.view<DeviceType>()),
+                _maskw(atom->k_mask.view<DeviceType>()),
+                _imagew(atom->k_image.view<DeviceType>()),
+                _sendlist(sendlist.template view<DeviceType>()),
+                _copylist(copylist.template view<DeviceType>()),
+                _nlocal(nlocal),_dim(dim),
+                _lo(lo),_hi(hi){
+    const size_t elements = 11;
+    const int maxsendlist = (buf.template view<DeviceType>().dimension_0()*buf.template view<DeviceType>().dimension_1())/elements;
+
+    buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int &mysend) const {
+    const int i = _sendlist(mysend);
+    _buf(mysend,0) = 11;
+    _buf(mysend,1) = _x(i,0);
+    _buf(mysend,2) = _x(i,1);
+    _buf(mysend,3) = _x(i,2);
+    _buf(mysend,4) = _v(i,0);
+    _buf(mysend,5) = _v(i,1);
+    _buf(mysend,6) = _v(i,2);
+    _buf(mysend,7) = _tag[i];
+    _buf(mysend,8) = _type[i];
+    _buf(mysend,9) = _mask[i];
+    _buf(mysend,10) = _image[i];
+    const int j = _copylist(mysend);
+
+    if(j>-1) {
+    _xw(i,0) = _x(j,0);
+    _xw(i,1) = _x(j,1);
+    _xw(i,2) = _x(j,2);
+    _vw(i,0) = _v(j,0);
+    _vw(i,1) = _v(j,1);
+    _vw(i,2) = _v(j,2);
+    _tagw[i] = _tag(j);
+    _typew[i] = _type(j);
+    _maskw[i] = _mask(j);
+    _imagew[i] = _image(j);
+    }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecAtomicKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space,int dim,X_FLOAT lo,X_FLOAT hi )
+{
+  if(nsend > (k_buf.view<LMPHostType>().dimension_0()*k_buf.view<LMPHostType>().dimension_1())/11) {
+    int newsize = nsend*11/k_buf.view<LMPHostType>().dimension_1()+1;
+    k_buf.resize(newsize,k_buf.view<LMPHostType>().dimension_1());
+  }
+  if(space == Host) {
+    AtomVecAtomicKokkos_PackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
+    Kokkos::parallel_for(nsend,f);
+    LMPHostType::fence();
+    return nsend*11;
+  } else {
+    AtomVecAtomicKokkos_PackExchangeFunctor<LMPDeviceType> f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
+    Kokkos::parallel_for(nsend,f);
+    LMPDeviceType::fence();
+    return nsend*11;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecAtomicKokkos::pack_exchange(int i, double *buf)
+{
+  int m = 1;
+  buf[m++] = h_x(i,0);
+  buf[m++] = h_x(i,1);
+  buf[m++] = h_x(i,2);
+  buf[m++] = h_v(i,0);
+  buf[m++] = h_v(i,1);
+  buf[m++] = h_v(i,2);
+  buf[m++] = h_tag[i];
+  buf[m++] = h_type[i];
+  buf[m++] = h_mask[i];
+  *((tagint *) &buf[m++]) = h_image[i];
+
+  if (atom->nextra_grow)
+    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
+      m += modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&buf[m]);
+
+  buf[0] = m;
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct AtomVecAtomicKokkos_UnpackExchangeFunctor {
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+  X_FLOAT _lo,_hi;
+  typename AT::t_x_array _x;
+  typename AT::t_v_array _v;
+  typename AT::t_tagint_1d _tag;
+  typename AT::t_int_1d _type;
+  typename AT::t_int_1d _mask;
+  typename AT::t_int_1d _image;
+
+  typename AT::t_xfloat_2d_um _buf;
+  int _dim;
+  typename AT::t_int_1d _nlocal;
+
+  AtomVecAtomicKokkos_UnpackExchangeFunctor(
+      const AtomKokkos* atom,
+      const typename AT::tdual_xfloat_2d buf,
+      typename AT::tdual_int_1d nlocal,
+      int dim, X_FLOAT lo, X_FLOAT hi):
+                _x(atom->k_x.view<DeviceType>()),
+                _v(atom->k_v.view<DeviceType>()),
+                _tag(atom->k_tag.view<DeviceType>()),
+                _type(atom->k_type.view<DeviceType>()),
+                _mask(atom->k_mask.view<DeviceType>()),
+                _image(atom->k_image.view<DeviceType>()),
+                _nlocal(nlocal.template view<DeviceType>()),_dim(dim),
+                _lo(lo),_hi(hi){
+    const size_t elements = 11;
+    const int maxsendlist = (buf.template view<DeviceType>().dimension_0()*buf.template view<DeviceType>().dimension_1())/elements;
+
+    buffer_view<DeviceType>(_buf,buf,maxsendlist,elements);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int &myrecv) const {
+    X_FLOAT x = _buf(myrecv,_dim+1);
+    if (x >= _lo && x < _hi) {
+      int i = Kokkos::atomic_fetch_add(&_nlocal(0),1);
+      _x(i,0) = _buf(myrecv,1);
+      _x(i,1) = _buf(myrecv,2);
+      _x(i,2) = _buf(myrecv,3);
+      _v(i,0) = _buf(myrecv,4);
+      _v(i,1) = _buf(myrecv,5);
+      _v(i,2) = _buf(myrecv,6);
+      _tag[i] = _buf(myrecv,7);
+      _type[i] = _buf(myrecv,8);
+      _mask[i] = _buf(myrecv,9);
+      _image[i] = _buf(myrecv,10);
+    }
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecAtomicKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) {
+  if(space == Host) {
+    k_count.h_view(0) = nlocal;
+    AtomVecAtomicKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
+    Kokkos::parallel_for(nrecv/11,f);
+    LMPHostType::fence();
+    return k_count.h_view(0);
+  } else {
+    k_count.h_view(0) = nlocal;
+    k_count.modify<LMPHostType>();
+    k_count.sync<LMPDeviceType>();
+    AtomVecAtomicKokkos_UnpackExchangeFunctor<LMPDeviceType> f(atomKK,k_buf,k_count,dim,lo,hi);
+    Kokkos::parallel_for(nrecv/11,f);
+    LMPDeviceType::fence();
+    k_count.modify<LMPDeviceType>();
+    k_count.sync<LMPHostType>();
+
+    return k_count.h_view(0);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int AtomVecAtomicKokkos::unpack_exchange(double *buf)
+{
+  int nlocal = atom->nlocal;
+  if (nlocal == nmax) grow(0);
+  modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | 
+           MASK_MASK | IMAGE_MASK);
+
+  int m = 1;
+  h_x(nlocal,0) = buf[m++];
+  h_x(nlocal,1) = buf[m++];
+  h_x(nlocal,2) = buf[m++];
+  h_v(nlocal,0) = buf[m++];
+  h_v(nlocal,1) = buf[m++];
+  h_v(nlocal,2) = buf[m++];
+  h_tag[nlocal] = static_cast<int> (buf[m++]);
+  h_type[nlocal] = static_cast<int> (buf[m++]);
+  h_mask[nlocal] = static_cast<int> (buf[m++]);
+  h_image[nlocal] = static_cast<int> (buf[m++]);
+
+  if (atom->nextra_grow)
+    for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
+      m += modify->fix[atom->extra_grow[iextra]]->
+        unpack_exchange(nlocal,&buf[m]);
+
+  atom->nlocal++;
+  return m;
+}
+
+/* ----------------------------------------------------------------------
+   size of restart data for all atoms owned by this proc
+   include extra data stored by fixes
+------------------------------------------------------------------------- */
+
+int AtomVecAtomicKokkos::size_restart()
+{
+  int i;
+
+  int nlocal = atom->nlocal;
+  int n = 11 * nlocal;
+
+  if (atom->nextra_restart)
+    for (int iextra = 0; iextra < atom->nextra_restart; iextra++)
+      for (i = 0; i < nlocal; i++)
+        n += modify->fix[atom->extra_restart[iextra]]->size_restart(i);
+
+  return n;
+}
+
+/* ----------------------------------------------------------------------
+   pack atom I's data for restart file including extra quantities
+   xyz must be 1st 3 values, so that read_restart can test on them
+   molecular types may be negative, but write as positive
+------------------------------------------------------------------------- */
+
+int AtomVecAtomicKokkos::pack_restart(int i, double *buf)
+{
+  int m = 1;
+  buf[m++] = h_x(i,0);
+  buf[m++] = h_x(i,1);
+  buf[m++] = h_x(i,2);
+  buf[m++] = h_tag[i];
+  buf[m++] = h_type[i];
+  buf[m++] = h_mask[i];
+  buf[m++] = h_image[i];
+  buf[m++] = h_v(i,0);
+  buf[m++] = h_v(i,1);
+  buf[m++] = h_v(i,2);
+
+  if (atom->nextra_restart)
+    for (int iextra = 0; iextra < atom->nextra_restart; iextra++)
+      m += modify->fix[atom->extra_restart[iextra]]->pack_restart(i,&buf[m]);
+
+  buf[0] = m;
+  return m;
+}
+
+/* ----------------------------------------------------------------------
+   unpack data for one atom from restart file including extra quantities
+------------------------------------------------------------------------- */
+
+int AtomVecAtomicKokkos::unpack_restart(double *buf)
+{
+  int nlocal = atom->nlocal;
+  if (nlocal == nmax) {
+    grow(0);
+    if (atom->nextra_store)
+      memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra");
+  }
+
+  int m = 1;
+  h_x(nlocal,0) = buf[m++];
+  h_x(nlocal,1) = buf[m++];
+  h_x(nlocal,2) = buf[m++];
+  h_tag[nlocal] = static_cast<int> (buf[m++]);
+  h_type[nlocal] = static_cast<int> (buf[m++]);
+  h_mask[nlocal] = static_cast<int> (buf[m++]);
+  h_image[nlocal] = *((tagint *) &buf[m++]);
+  h_v(nlocal,0) = buf[m++];
+  h_v(nlocal,1) = buf[m++];
+  h_v(nlocal,2) = buf[m++];
+
+  double **extra = atom->extra;
+  if (atom->nextra_store) {
+    int size = static_cast<int> (buf[0]) - m;
+    for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++];
+  }
+
+  atom->nlocal++;
+  return m;
+}
+
+/* ----------------------------------------------------------------------
+   create one atom of itype at coord
+   set other values to defaults
+------------------------------------------------------------------------- */
+
+void AtomVecAtomicKokkos::create_atom(int itype, double *coord)
+{
+  int nlocal = atom->nlocal;
+  if (nlocal == nmax) {
+    //if(nlocal>2) printf("typeA: %i %i\n",type[0],type[1]);
+    atomKK->modified(Host,ALL_MASK);
+    grow(0);
+    //if(nlocal>2) printf("typeB: %i %i\n",type[0],type[1]);
+  }
+  atomKK->modified(Host,ALL_MASK);
+
+  tag[nlocal] = 0;
+  type[nlocal] = itype;
+  h_x(nlocal,0) = coord[0];
+  h_x(nlocal,1) = coord[1];
+  h_x(nlocal,2) = coord[2];
+  h_mask[nlocal] = 1;
+  h_image[nlocal] = ((tagint) IMGMAX << IMG2BITS) |
+    ((tagint) IMGMAX << IMGBITS) | IMGMAX;
+  h_v(nlocal,0) = 0.0;
+  h_v(nlocal,1) = 0.0;
+  h_v(nlocal,2) = 0.0;
+
+  atom->nlocal++;
+}
+
+/* ----------------------------------------------------------------------
+   unpack one line from Atoms section of data file
+   initialize other atom quantities
+------------------------------------------------------------------------- */
+
+void AtomVecAtomicKokkos::data_atom(double *coord, tagint imagetmp, 
+                                    char **values)
+{
+  int nlocal = atom->nlocal;
+  if (nlocal == nmax) grow(0);
+
+  h_tag[nlocal] = atoi(values[0]);
+  if (tag[nlocal] <= 0)
+    error->one(FLERR,"Invalid atom ID in Atoms section of data file");
+
+  h_type[nlocal] = atoi(values[1]);
+  if (type[nlocal] <= 0 || type[nlocal] > atom->ntypes)
+    error->one(FLERR,"Invalid atom type in Atoms section of data file");
+
+  h_x(nlocal,0) = coord[0];
+  h_x(nlocal,1) = coord[1];
+  h_x(nlocal,2) = coord[2];
+
+  h_image[nlocal] = imagetmp;
+
+  h_mask[nlocal] = 1;
+  h_v(nlocal,0) = 0.0;
+  h_v(nlocal,1) = 0.0;
+  h_v(nlocal,2) = 0.0;
+
+  atom->nlocal++;
+}
+
+/* ----------------------------------------------------------------------
+   pack atom info for data file including 3 image flags
+------------------------------------------------------------------------- */
+
+void AtomVecAtomicKokkos::pack_data(double **buf)
+{
+  int nlocal = atom->nlocal;
+  for (int i = 0; i < nlocal; i++) {
+    buf[i][0] = h_tag[i];
+    buf[i][1] = h_type[i];
+    buf[i][2] = h_x(i,0);
+    buf[i][3] = h_x(i,1);
+    buf[i][4] = h_x(i,2);
+    buf[i][5] = (h_image[i] & IMGMASK) - IMGMAX;
+    buf[i][6] = (h_image[i] >> IMGBITS & IMGMASK) - IMGMAX;
+    buf[i][7] = (h_image[i] >> IMG2BITS) - IMGMAX;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   write atom info to data file including 3 image flags
+------------------------------------------------------------------------- */
+
+void AtomVecAtomicKokkos::write_data(FILE *fp, int n, double **buf)
+{
+  for (int i = 0; i < n; i++)
+    fprintf(fp,"%d %d %-1.16e %-1.16e %-1.16e %d %d %d\n",
+            (int) buf[i][0],(int) buf[i][1],buf[i][2],buf[i][3],buf[i][4],
+            (int) buf[i][5],(int) buf[i][6],(int) buf[i][7]);
+}
+
+/* ----------------------------------------------------------------------
+   return # of bytes of allocated memory
+------------------------------------------------------------------------- */
+
+bigint AtomVecAtomicKokkos::memory_usage()
+{
+  bigint bytes = 0;
+
+  if (atom->memcheck("tag")) bytes += memory->usage(tag,nmax);
+  if (atom->memcheck("type")) bytes += memory->usage(type,nmax);
+  if (atom->memcheck("mask")) bytes += memory->usage(mask,nmax);
+  if (atom->memcheck("image")) bytes += memory->usage(image,nmax);
+  if (atom->memcheck("x")) bytes += memory->usage(x,nmax,3);
+  if (atom->memcheck("v")) bytes += memory->usage(v,nmax,3);
+  if (atom->memcheck("f")) bytes += memory->usage(f,nmax*commKK->nthreads,3);
+
+  return bytes;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecAtomicKokkos::sync(ExecutionSpace space, unsigned int mask)
+{
+  if (space == Device) {
+    if (mask & X_MASK) atomKK->k_x.sync<LMPDeviceType>();
+    if (mask & V_MASK) atomKK->k_v.sync<LMPDeviceType>();
+    if (mask & F_MASK) atomKK->k_f.sync<LMPDeviceType>();
+    if (mask & TAG_MASK) atomKK->k_tag.sync<LMPDeviceType>();
+    if (mask & TYPE_MASK) atomKK->k_type.sync<LMPDeviceType>();
+    if (mask & MASK_MASK) atomKK->k_mask.sync<LMPDeviceType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPDeviceType>();
+  } else {
+    if (mask & X_MASK) atomKK->k_x.sync<LMPHostType>();
+    if (mask & V_MASK) atomKK->k_v.sync<LMPHostType>();
+    if (mask & F_MASK) atomKK->k_f.sync<LMPHostType>();
+    if (mask & TAG_MASK) atomKK->k_tag.sync<LMPHostType>();
+    if (mask & TYPE_MASK) atomKK->k_type.sync<LMPHostType>();
+    if (mask & MASK_MASK) atomKK->k_mask.sync<LMPHostType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPHostType>();
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AtomVecAtomicKokkos::modified(ExecutionSpace space, unsigned int mask)
+{
+  if (space == Device) {
+    if (mask & X_MASK) atomKK->k_x.modify<LMPDeviceType>();
+    if (mask & V_MASK) atomKK->k_v.modify<LMPDeviceType>();
+    if (mask & F_MASK) atomKK->k_f.modify<LMPDeviceType>();
+    if (mask & TAG_MASK) atomKK->k_tag.modify<LMPDeviceType>();
+    if (mask & TYPE_MASK) atomKK->k_type.modify<LMPDeviceType>();
+    if (mask & MASK_MASK) atomKK->k_mask.modify<LMPDeviceType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPDeviceType>();
+  } else {
+    if (mask & X_MASK) atomKK->k_x.modify<LMPHostType>();
+    if (mask & V_MASK) atomKK->k_v.modify<LMPHostType>();
+    if (mask & F_MASK) atomKK->k_f.modify<LMPHostType>();
+    if (mask & TAG_MASK) atomKK->k_tag.modify<LMPHostType>();
+    if (mask & TYPE_MASK) atomKK->k_type.modify<LMPHostType>();
+    if (mask & MASK_MASK) atomKK->k_mask.modify<LMPHostType>();
+    if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPHostType>();
+  }
+}
diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.h b/src/KOKKOS/atom_vec_atomic_kokkos.h
new file mode 100644
index 0000000000..dc96cbb650
--- /dev/null
+++ b/src/KOKKOS/atom_vec_atomic_kokkos.h
@@ -0,0 +1,111 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale AtomicKokkos/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef ATOM_CLASS
+
+AtomStyle(atomic/kk,AtomVecAtomicKokkos)
+
+#else
+
+#ifndef LMP_ATOM_VEC_ATOMIC_KOKKOS_H
+#define LMP_ATOM_VEC_ATOMIC_KOKKOS_H
+
+#include "atom_vec_kokkos.h"
+#include "kokkos_type.h"
+
+namespace LAMMPS_NS {
+
+class AtomVecAtomicKokkos : public AtomVecKokkos {
+ public:
+  AtomVecAtomicKokkos(class LAMMPS *);
+  virtual ~AtomVecAtomicKokkos() {}
+  void grow(int);
+  void copy(int, int, int);
+  int pack_comm(int, int *, double *, int, int *);
+  int pack_comm_vel(int, int *, double *, int, int *);
+  void unpack_comm(int, int, double *);
+  void unpack_comm_vel(int, int, double *);
+  int pack_reverse(int, int, double *);
+  void unpack_reverse(int, int *, double *);
+  int pack_border(int, int *, double *, int, int *);
+  int pack_border_vel(int, int *, double *, int, int *);
+  void unpack_border(int, int, double *);
+  void unpack_border_vel(int, int, double *);
+  int pack_exchange(int, double *);
+  int unpack_exchange(double *);
+  int size_restart();
+  int pack_restart(int, double *);
+  int unpack_restart(double *);
+  void create_atom(int, double *);
+  void data_atom(double *, tagint, char **);
+  void pack_data(double **);
+  void write_data(FILE *, int, double **);
+  bigint memory_usage();
+
+  void grow_reset();
+  int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist, 
+                       const int & iswap,
+                       const DAT::tdual_xfloat_2d &buf,
+                       const int &pbc_flag, const int pbc[]);
+  void unpack_comm_kokkos(const int &n, const int &nfirst, 
+                          const DAT::tdual_xfloat_2d &buf);
+  int pack_comm_self(const int &n, const DAT::tdual_int_2d &list, 
+                     const int & iswap, const int nfirst, 
+                     const int &pbc_flag, const int pbc[]);
+  int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, 
+                         DAT::tdual_xfloat_2d buf,int iswap,
+                         int pbc_flag, int *pbc, ExecutionSpace space);
+  void unpack_border_kokkos(const int &n, const int &nfirst, 
+                            const DAT::tdual_xfloat_2d &buf, 
+                            ExecutionSpace space);
+  int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, 
+                           DAT::tdual_int_1d k_sendlist,
+                           DAT::tdual_int_1d k_copylist,
+                           ExecutionSpace space, int dim, 
+                           X_FLOAT lo, X_FLOAT hi);
+  int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
+                             int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
+                             ExecutionSpace space);
+
+  void sync(ExecutionSpace space, unsigned int mask);
+  void modified(ExecutionSpace space, unsigned int mask);
+
+ protected:
+  int *tag,*type,*mask;
+  tagint *image;
+  double **x,**v,**f;
+
+  DAT::t_int_1d d_tag, d_type, d_mask;
+  HAT::t_int_1d h_tag, h_type, h_mask;
+
+  DAT::t_tagint_1d d_image;
+  HAT::t_tagint_1d h_image;
+
+  DAT::t_x_array d_x;
+  DAT::t_v_array d_v;
+  DAT::t_f_array d_f;
+  HAT::t_x_array h_x;
+  HAT::t_v_array h_v;
+  HAT::t_f_array h_f;
+
+  DAT::tdual_int_1d k_count;
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+*/
diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp
new file mode 100644
index 0000000000..1d9174196a
--- /dev/null
+++ b/src/KOKKOS/atom_vec_kokkos.cpp
@@ -0,0 +1,23 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "atom_vec_kokkos.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+AtomVecKokkos::AtomVecKokkos(LAMMPS *lmp) : AtomVec(lmp)
+{
+  kokkosable = 1;
+}
diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h
new file mode 100644
index 0000000000..ac651b0b5a
--- /dev/null
+++ b/src/KOKKOS/atom_vec_kokkos.h
@@ -0,0 +1,76 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef LMP_ATOM_VEC_KOKKOS_H
+#define LMP_ATOM_VEC_KOKKOS_H
+
+#include "atom_vec.h"
+#include "kokkos_type.h"
+
+namespace LAMMPS_NS {
+
+class AtomVecKokkos : public AtomVec {
+ public:
+  AtomVecKokkos(class LAMMPS *);
+  virtual ~AtomVecKokkos() {}
+
+  virtual void sync(ExecutionSpace space, unsigned int mask) {};
+  virtual void modified(ExecutionSpace space, unsigned int mask) {};
+
+  virtual int 
+    pack_comm_self(const int &n, const DAT::tdual_int_2d &list, 
+                   const int & iswap, const int nfirst, 
+                   const int &pbc_flag, const int pbc[])
+  {return 0;}
+  virtual int 
+    pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &list, 
+                     const int & iswap, const DAT::tdual_xfloat_2d &buf,
+                     const int &pbc_flag, const int pbc[])
+  {return 0;}
+  virtual void 
+    unpack_comm_kokkos(const int &n, const int &nfirst, 
+                       const DAT::tdual_xfloat_2d &buf) {};
+  virtual int 
+    pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, 
+                       DAT::tdual_xfloat_2d buf,int iswap,
+                       int pbc_flag, int *pbc, ExecutionSpace space)
+  {return 0;};
+  virtual void 
+    unpack_border_kokkos(const int &n, const int &nfirst, 
+                         const DAT::tdual_xfloat_2d &buf, 
+                         ExecutionSpace space) {};
+
+  virtual int 
+    pack_exchange_kokkos(const int &nsend, DAT::tdual_xfloat_2d &buf, 
+                         DAT::tdual_int_1d k_sendlist,
+                         DAT::tdual_int_1d k_copylist,
+                         ExecutionSpace space, int dim, X_FLOAT lo, X_FLOAT hi) 
+  {return 0;};
+  virtual int 
+    unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
+                           int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
+                           ExecutionSpace space)
+  {return 0;};
+
+ protected:
+  class AtomKokkos *atomKK;
+  class CommKokkos *commKK;
+};
+
+}
+
+#endif
+
+/* ERROR/WARNING messages:
+
+*/
diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp
new file mode 100644
index 0000000000..5211d11a02
--- /dev/null
+++ b/src/KOKKOS/comm_kokkos.cpp
@@ -0,0 +1,820 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "comm_kokkos.h"
+#include "kokkos.h"
+#include "atom.h"
+#include "atom_kokkos.h"
+#include "atom_vec.h"
+#include "atom_vec_kokkos.h"
+#include "domain.h"
+#include "atom_masks.h"
+#include "error.h"
+#include "memory.h"
+
+using namespace LAMMPS_NS;
+
+#define BUFFACTOR 1.5
+#define BUFMIN 10000
+#define BUFEXTRA 1000
+
+enum{SINGLE,MULTI};
+
+/* ----------------------------------------------------------------------
+   setup MPI and allocate buffer space
+------------------------------------------------------------------------- */
+
+CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp)
+{
+  sendlist = NULL;  // need to free this since parent allocated?
+  k_sendlist = ArrayTypes<LMPDeviceType>::tdual_int_2d();
+
+  // error check for disallow of OpenMP threads?
+
+  // initialize comm buffers & exchange memory
+
+  maxsend = BUFMIN;
+  k_buf_send = ArrayTypes<LMPDeviceType>::
+    tdual_xfloat_2d("comm:k_buf_send",(maxsend+BUFEXTRA+5)/6,6);
+  buf_send = k_buf_send.view<LMPHostType>().ptr_on_device();
+
+  maxrecv = BUFMIN;
+  k_buf_recv = ArrayTypes<LMPDeviceType>::
+    tdual_xfloat_2d("comm:k_buf_recv",(maxrecv+5)/6,6);
+  buf_recv = k_buf_recv.view<LMPHostType>().ptr_on_device();
+
+  k_exchange_sendlist = ArrayTypes<LMPDeviceType>::
+    tdual_int_1d("comm:k_exchange_sendlist",100);
+  k_exchange_copylist = ArrayTypes<LMPDeviceType>::
+    tdual_int_1d("comm:k_exchange_copylist",100);
+  k_count = ArrayTypes<LMPDeviceType>::tdual_int_1d("comm:k_count",1);
+  k_sendflag = ArrayTypes<LMPDeviceType>::tdual_int_1d("comm:k_sendflag",100);
+
+  // next line is bogus?
+
+  memory->create(maxsendlist,maxswap,"comm:maxsendlist");
+  for (int i = 0; i < maxswap; i++) {
+    maxsendlist[i] = BUFMIN;
+  }
+  memory->create_kokkos(k_sendlist,sendlist,maxswap,BUFMIN,"comm:sendlist");
+}
+
+/* ---------------------------------------------------------------------- */
+
+CommKokkos::~CommKokkos()
+{
+  memory->destroy_kokkos(k_sendlist,sendlist);
+  memory->destroy_kokkos(k_buf_send,buf_send);
+  memory->destroy_kokkos(k_buf_recv,buf_recv);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void CommKokkos::init()
+{
+  atomKK = (AtomKokkos *) atom;
+  exchange_comm_classic = lmp->kokkos->exchange_comm_classic;
+  forward_comm_classic = lmp->kokkos->forward_comm_classic;
+  exchange_comm_on_host = lmp->kokkos->exchange_comm_on_host;
+  forward_comm_on_host = lmp->kokkos->forward_comm_on_host;
+
+  CommBrick::init();
+}
+
+/* ----------------------------------------------------------------------
+   forward communication of atom coords every timestep
+   other per-atom attributes may also be sent via pack/unpack routines
+------------------------------------------------------------------------- */
+
+void CommKokkos::forward_comm(int dummy)
+{
+
+ if (!forward_comm_classic) {
+    if (forward_comm_on_host) forward_comm_device<LMPHostType>(dummy);
+    else forward_comm_device<LMPDeviceType>(dummy);
+    return;
+  }
+
+  k_sendlist.sync<LMPHostType>();
+
+  if (comm_x_only) {
+    atomKK->sync(Host,X_MASK);
+    atomKK->modified(Host,X_MASK);
+  } else if (ghost_velocity) {
+    atomKK->sync(Host,X_MASK | V_MASK);
+    atomKK->modified(Host,X_MASK | V_MASK);
+  } else {
+    atomKK->sync(Host,ALL_MASK);
+    atomKK->modified(Host,ALL_MASK);
+  }
+
+  CommBrick::forward_comm(dummy);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void CommKokkos::forward_comm_device(int dummy)
+{
+  int n;
+  MPI_Request request;
+  MPI_Status status;
+  AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec;
+  double **x = atom->x;
+  double *buf;
+
+  // exchange data with another proc
+  // if other proc is self, just copy
+  // if comm_x_only set, exchange or copy directly to x, don't unpack
+
+  k_sendlist.sync<DeviceType>();
+
+  for (int iswap = 0; iswap < nswap; iswap++) {
+
+    if (sendproc[iswap] != me) {
+      if (comm_x_only) {
+        atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK);
+        if (size_forward_recv[iswap]) buf = x[firstrecv[iswap]];
+        else buf = NULL;
+
+        if (size_forward_recv[iswap]) {
+            buf = atomKK->k_x.view<DeviceType>().ptr_on_device() + 
+              firstrecv[iswap]*atomKK->k_x.view<DeviceType>().dimension_1();
+            MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
+                    recvproc[iswap],0,world,&request);
+        }
+        n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,
+                                   iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]);
+
+        if (n) {
+          MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),
+                   n,MPI_DOUBLE,sendproc[iswap],0,world);
+        }
+
+        if (size_forward_recv[iswap]) MPI_Wait(&request,&status);
+        atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::
+                         space,X_MASK);
+      } else if (ghost_velocity) {
+        error->all(FLERR,"Ghost velocity forward comm not yet "
+                   "implemented with Kokkos");
+        if (size_forward_recv[iswap])
+          MPI_Irecv(k_buf_recv.view<LMPHostType>().ptr_on_device(),
+                    size_forward_recv[iswap],MPI_DOUBLE,
+                    recvproc[iswap],0,world,&request);
+        n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap],
+                                buf_send,pbc_flag[iswap],pbc[iswap]);
+        if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
+        if (size_forward_recv[iswap]) MPI_Wait(&request,&status);
+        avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_recv);
+      } else {
+        if (size_forward_recv[iswap])
+          MPI_Irecv(k_buf_recv.view<DeviceType>().ptr_on_device(),
+                    size_forward_recv[iswap],MPI_DOUBLE,
+                    recvproc[iswap],0,world,&request);
+        n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,iswap,
+                                   k_buf_send,pbc_flag[iswap],pbc[iswap]);
+        if (n)
+          MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),n,
+                   MPI_DOUBLE,sendproc[iswap],0,world);
+        if (size_forward_recv[iswap]) MPI_Wait(&request,&status);
+        avec->unpack_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv);
+      }
+
+    } else {
+      if (!ghost_velocity) {
+        if (sendnum[iswap])
+          n = avec->pack_comm_self(sendnum[iswap],k_sendlist,iswap,
+                                   firstrecv[iswap],pbc_flag[iswap],pbc[iswap]);
+      } else if (ghost_velocity) {
+        error->all(FLERR,"Ghost velocity forward comm not yet "
+                   "implemented with Kokkos");
+        n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap],
+                                buf_send,pbc_flag[iswap],pbc[iswap]);
+        avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_send);
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   exchange: move atoms to correct processors
+   atoms exchanged with all 6 stencil neighbors
+   send out atoms that have left my box, receive ones entering my box
+   atoms will be lost if not inside some proc's box
+     can happen if atom moves outside of non-periodic bounary
+     or if atom moves more than one proc away
+   this routine called before every reneighboring
+   for triclinic, atoms must be in lamda coords (0-1) before exchange is called
+------------------------------------------------------------------------- */
+
+void CommKokkos::exchange()
+{
+  if (!exchange_comm_classic) {
+    if (exchange_comm_on_host) exchange_device<LMPHostType>();
+    else exchange_device<LMPDeviceType>();
+    return;
+  }
+
+  atomKK->sync(Host,ALL_MASK);
+  atomKK->modified(Host,ALL_MASK);
+
+  CommBrick::exchange();
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct BuildExchangeListFunctor {
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+  X_FLOAT _lo,_hi;
+  typename AT::t_x_array _x;
+
+  int _nlocal,_dim;
+  typename AT::t_int_1d _nsend;
+  typename AT::t_int_1d _sendlist;
+  typename AT::t_int_1d _sendflag;
+
+
+  BuildExchangeListFunctor(
+      const typename AT::tdual_x_array x,
+      const typename AT::tdual_int_1d sendlist,
+      typename AT::tdual_int_1d nsend,
+      typename AT::tdual_int_1d sendflag,int nlocal, int dim,
+                X_FLOAT lo, X_FLOAT hi):
+                _x(x.template view<DeviceType>()),
+                _sendlist(sendlist.template view<DeviceType>()),
+                _nsend(nsend.template view<DeviceType>()),
+                _sendflag(sendflag.template view<DeviceType>()),
+                _nlocal(nlocal),_dim(dim),
+                _lo(lo),_hi(hi){
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (int i) const {
+    if (_x(i,_dim) < _lo || _x(i,_dim) >= _hi) {
+      const int mysend=Kokkos::atomic_fetch_add(&_nsend(0),1);
+      if(mysend<_sendlist.dimension_0()) {
+        _sendlist(mysend) = i;
+        _sendflag(i) = 1;
+      }
+    } else
+      _sendflag(i) = 0;
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void CommKokkos::exchange_device()
+{
+  int i,m,nsend,nrecv,nrecv1,nrecv2,nlocal;
+  double lo,hi,value;
+  double **x;
+  double *sublo,*subhi,*buf;
+  MPI_Request request;
+  MPI_Status status;
+  AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec;
+
+  // clear global->local map for owned and ghost atoms
+  // b/c atoms migrate to new procs in exchange() and
+  //   new ghosts are created in borders()
+  // map_set() is done at end of borders()
+  // clear ghost count and any ghost bonus data internal to AtomVec
+
+  if (map_style) atom->map_clear();
+  atom->nghost = 0;
+  atom->avec->clear_bonus();
+
+  // subbox bounds for orthogonal or triclinic
+
+  if (triclinic == 0) {
+    sublo = domain->sublo;
+    subhi = domain->subhi;
+  } else {
+    sublo = domain->sublo_lamda;
+    subhi = domain->subhi_lamda;
+  }
+
+  atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,ALL_MASK);
+
+  // loop over dimensions
+  for (int dim = 0; dim < 3; dim++) {
+
+    // fill buffer with atoms leaving my box, using < and >=
+    // when atom is deleted, fill it in with last atom
+
+    x = atom->x;
+    lo = sublo[dim];
+    hi = subhi[dim];
+    nlocal = atom->nlocal;
+    i = nsend = 0;
+
+    if (true) {
+      if (k_sendflag.h_view.dimension_0()<nlocal) k_sendflag.resize(nlocal);
+      k_count.h_view(0) = k_exchange_sendlist.h_view.dimension_0();
+      while (k_count.h_view(0)>=k_exchange_sendlist.h_view.dimension_0()) {
+        k_count.h_view(0) = 0;
+        k_count.modify<LMPHostType>();
+        k_count.sync<DeviceType>();
+
+        BuildExchangeListFunctor<DeviceType> 
+          f(atomKK->k_x,k_exchange_sendlist,k_count,k_sendflag,
+            nlocal,dim,lo,hi);
+        Kokkos::parallel_for(nlocal,f);
+        DeviceType::fence();
+        k_exchange_sendlist.modify<DeviceType>();
+        k_sendflag.modify<DeviceType>();
+        k_count.modify<DeviceType>();
+
+        k_count.sync<LMPHostType>();
+        if (k_count.h_view(0)>=k_exchange_sendlist.h_view.dimension_0()) {
+          k_exchange_sendlist.resize(k_count.h_view(0)*1.1);
+          k_exchange_copylist.resize(k_count.h_view(0)*1.1);
+          k_count.h_view(0)=k_exchange_sendlist.h_view.dimension_0();
+        }
+      }
+      k_exchange_sendlist.sync<LMPHostType>();
+      k_sendflag.sync<LMPHostType>();
+
+      int sendpos = nlocal-1;
+      nlocal -= k_count.h_view(0);
+      for(int i = 0; i < k_count.h_view(0); i++) {
+        if (k_exchange_sendlist.h_view(i)<nlocal) {
+          while (k_sendflag.h_view(sendpos)) sendpos--;
+          k_exchange_copylist.h_view(i) = sendpos;
+          sendpos--;
+        } else
+        k_exchange_copylist.h_view(i) = -1;
+      }
+
+      k_exchange_copylist.modify<LMPHostType>();
+      k_exchange_copylist.sync<DeviceType>();
+      nsend = 
+        avec->pack_exchange_kokkos(k_count.h_view(0),k_buf_send,
+                                   k_exchange_sendlist,k_exchange_copylist,
+                                   ExecutionSpaceFromDevice<DeviceType>::
+                                   space,dim,lo,hi);
+      DeviceType::fence();
+
+    } else {
+      while (i < nlocal) {
+        if (x[i][dim] < lo || x[i][dim] >= hi) {
+          if (nsend > maxsend) grow_send_kokkos(nsend,1);
+          nsend += avec->pack_exchange(i,&buf_send[nsend]);
+          avec->copy(nlocal-1,i,1);
+          nlocal--;
+        } else i++;
+      }
+    }
+    atom->nlocal = nlocal;
+
+    // send/recv atoms in both directions
+    // if 1 proc in dimension, no send/recv, set recv buf to send buf
+    // if 2 procs in dimension, single send/recv
+    // if more than 2 procs in dimension, send/recv to both neighbors
+
+    if (procgrid[dim] == 1) {
+      nrecv = nsend;
+      buf = buf_send;
+      if (nrecv) {
+        atom->nlocal=avec->
+          unpack_exchange_kokkos(k_buf_send,nrecv,atom->nlocal,dim,lo,hi,
+                                 ExecutionSpaceFromDevice<DeviceType>::space);
+        DeviceType::fence();
+      }
+    } else {
+      MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][0],0,
+                   &nrecv1,1,MPI_INT,procneigh[dim][1],0,world,&status);
+      nrecv = nrecv1;
+      if (procgrid[dim] > 2) {
+        MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][1],0,
+                     &nrecv2,1,MPI_INT,procneigh[dim][0],0,world,&status);
+        nrecv += nrecv2;
+      }
+      if (nrecv > maxrecv) grow_recv_kokkos(nrecv);
+
+      MPI_Irecv(k_buf_recv.view<DeviceType>().ptr_on_device(),nrecv1,
+                MPI_DOUBLE,procneigh[dim][1],0,
+                world,&request);
+      MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),nsend,
+               MPI_DOUBLE,procneigh[dim][0],0,world);
+      MPI_Wait(&request,&status);
+
+      if (procgrid[dim] > 2) {
+        MPI_Irecv(k_buf_recv.view<DeviceType>().ptr_on_device()+nrecv1,
+                  nrecv2,MPI_DOUBLE,procneigh[dim][0],0,
+                  world,&request);
+        MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),nsend,
+                 MPI_DOUBLE,procneigh[dim][1],0,world);
+        MPI_Wait(&request,&status);
+      }
+
+      buf = buf_recv;
+      if (nrecv) {
+        atom->nlocal = avec->
+          unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi,
+                                 ExecutionSpaceFromDevice<DeviceType>::space);
+        DeviceType::fence();
+      }
+    }
+
+    // check incoming atoms to see if they are in my box
+    // if so, add to my list
+
+  }
+
+  atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::space,ALL_MASK);
+
+  if (atom->firstgroupname) {
+    /* this is not yet implemented with Kokkos */
+    atomKK->sync(Host,ALL_MASK);
+    atom->first_reorder();
+    atomKK->modified(Host,ALL_MASK);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   borders: list nearby atoms to send to neighboring procs at every timestep
+   one list is created for every swap that will be made
+   as list is made, actually do swaps
+   this does equivalent of a communicate, so don't need to explicitly
+     call communicate routine on reneighboring timestep
+   this routine is called before every reneighboring
+   for triclinic, atoms must be in lamda coords (0-1) before borders is called
+------------------------------------------------------------------------- */
+
+void CommKokkos::borders()
+{
+  if (!exchange_comm_classic) {
+    if (exchange_comm_on_host) borders_device<LMPHostType>();
+    else borders_device<LMPDeviceType>();
+    return;
+  }
+
+  atomKK->sync(Host,ALL_MASK);
+  k_sendlist.modify<LMPHostType>();
+  atomKK->modified(Host,ALL_MASK);
+
+  CommBrick::borders();
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+struct BuildBorderListFunctor {
+	typedef DeviceType device_type;
+	typedef ArrayTypes<DeviceType> AT;
+  X_FLOAT lo,hi;
+  typename AT::t_x_array x;
+  int iswap,maxsendlist;
+  int nfirst,nlast,dim;
+  typename AT::t_int_2d sendlist;
+  typename AT::t_int_1d nsend;
+
+  BuildBorderListFunctor(typename AT::tdual_x_array _x, 
+                         typename AT::tdual_int_2d _sendlist,
+                         typename AT::tdual_int_1d _nsend,int _nfirst, 
+                         int _nlast, int _dim,
+                         X_FLOAT _lo, X_FLOAT _hi, int _iswap, 
+                         int _maxsendlist):
+    x(_x.template view<DeviceType>()),
+    sendlist(_sendlist.template view<DeviceType>()),
+    nsend(_nsend.template view<DeviceType>()),
+    nfirst(_nfirst),nlast(_nlast),dim(_dim),
+    lo(_lo),hi(_hi),iswap(_iswap),maxsendlist(_maxsendlist){}
+
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (DeviceType dev) const {
+    const int chunk = ((nlast - nfirst + dev.league_size() - 1 ) / 
+                       dev.league_size());
+    const int teamstart = chunk*dev.league_rank() + nfirst;
+    const int teamend = (teamstart + chunk) < nlast?(teamstart + chunk):nlast;
+    int mysend = 0;
+    for (int i=teamstart + dev.team_rank(); i<teamend; i+=dev.team_size()) {
+      if (x(i,dim) >= lo && x(i,dim) <= hi) mysend++;
+    }
+    const int my_store_pos = dev.team_scan(mysend,&nsend(0));
+
+    if (my_store_pos+mysend < maxsendlist) {
+    mysend = my_store_pos;
+      for(int i=teamstart + dev.team_rank(); i<teamend; i+=dev.team_size()){
+        if (x(i,dim) >= lo && x(i,dim) <= hi) {
+          sendlist(iswap,mysend++) = i;
+        }
+      }
+    }
+  }
+
+  size_t shmem_size() const { return 1000u;}
+};
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void CommKokkos::borders_device() {
+  int i,n,itype,iswap,dim,ineed,twoneed,smax,rmax;
+  int nsend,nrecv,sendflag,nfirst,nlast,ngroup;
+  double lo,hi;
+  int *type;
+  double **x;
+  double *buf,*mlo,*mhi;
+  MPI_Request request;
+  MPI_Status status;
+  AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec;
+
+  ExecutionSpace exec_space = ExecutionSpaceFromDevice<DeviceType>::space;
+  k_sendlist.modify<DeviceType>();
+  atomKK->sync(exec_space,ALL_MASK);
+
+  // do swaps over all 3 dimensions
+
+  iswap = 0;
+  smax = rmax = 0;
+
+  for (dim = 0; dim < 3; dim++) {
+    nlast = 0;
+    twoneed = 2*maxneed[dim];
+    for (ineed = 0; ineed < twoneed; ineed++) {
+
+      // find atoms within slab boundaries lo/hi using <= and >=
+      // check atoms between nfirst and nlast
+      //   for first swaps in a dim, check owned and ghost
+      //   for later swaps in a dim, only check newly arrived ghosts
+      // store sent atom indices in list for use in future timesteps
+
+      x = atom->x;
+      if (style == SINGLE) {
+        lo = slablo[iswap];
+        hi = slabhi[iswap];
+      } else {
+        type = atom->type;
+        mlo = multilo[iswap];
+        mhi = multihi[iswap];
+      }
+      if (ineed % 2 == 0) {
+        nfirst = nlast;
+        nlast = atom->nlocal + atom->nghost;
+      }
+
+      nsend = 0;
+
+      // sendflag = 0 if I do not send on this swap
+      // sendneed test indicates receiver no longer requires data
+      // e.g. due to non-PBC or non-uniform sub-domains
+
+      if (ineed/2 >= sendneed[dim][ineed % 2]) sendflag = 0;
+      else sendflag = 1;
+
+      // find send atoms according to SINGLE vs MULTI
+      // all atoms eligible versus atoms in bordergroup
+      // only need to limit loop to bordergroup for first sends (ineed < 2)
+      // on these sends, break loop in two: owned (in group) and ghost
+
+      if (sendflag) {
+        if (!bordergroup || ineed >= 2) {
+          if (style == SINGLE) {
+            typename ArrayTypes<DeviceType>::tdual_int_1d total_send("TS",1);
+            total_send.h_view(0) = 0;
+            if(exec_space == Device) {
+              total_send.template modify<DeviceType>();
+              total_send.template sync<LMPDeviceType>();
+            }
+            BuildBorderListFunctor<DeviceType> f(atomKK->k_x,k_sendlist,
+                total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]);
+            Kokkos::ParallelWorkRequest config((nlast-nfirst+127)/128,128);
+            Kokkos::parallel_for(config,f);
+            DeviceType::fence();
+            total_send.template modify<DeviceType>();
+            total_send.template sync<LMPHostType>();
+
+            if(total_send.h_view(0) >= maxsendlist[iswap]) {
+              grow_list(iswap,total_send.h_view(0));
+              total_send.h_view(0) = 0;
+              if(exec_space == Device) {
+                total_send.template modify<LMPHostType>();
+                total_send.template sync<LMPDeviceType>();
+              }
+              BuildBorderListFunctor<DeviceType> f(atomKK->k_x,k_sendlist,
+                  total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]);
+              Kokkos::ParallelWorkRequest config((nlast-nfirst+127)/128,128);
+              Kokkos::parallel_for(config,f);
+              DeviceType::fence();
+              total_send.template modify<DeviceType>();
+              total_send.template sync<LMPHostType>();
+            }
+            nsend = total_send.h_view(0);
+          } else {
+            error->all(FLERR,"Required border comm not yet "
+                       "implemented with Kokkos\n");
+            for (i = nfirst; i < nlast; i++) {
+              itype = type[i];
+              if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
+                if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
+                sendlist[iswap][nsend++] = i;
+              }
+            }
+          }
+
+        } else {
+          error->all(FLERR,"Required border comm not yet "
+                     "implemented with Kokkos\n");
+          if (style == SINGLE) {
+            ngroup = atom->nfirst;
+            for (i = 0; i < ngroup; i++)
+              if (x[i][dim] >= lo && x[i][dim] <= hi) {
+                if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
+                sendlist[iswap][nsend++] = i;
+              }
+            for (i = atom->nlocal; i < nlast; i++)
+              if (x[i][dim] >= lo && x[i][dim] <= hi) {
+                if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
+                sendlist[iswap][nsend++] = i;
+              }
+          } else {
+            ngroup = atom->nfirst;
+            for (i = 0; i < ngroup; i++) {
+              itype = type[i];
+              if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
+                if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
+                sendlist[iswap][nsend++] = i;
+              }
+            }
+            for (i = atom->nlocal; i < nlast; i++) {
+              itype = type[i];
+              if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
+                if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
+                sendlist[iswap][nsend++] = i;
+              }
+            }
+          }
+        }
+      }
+
+      // pack up list of border atoms
+
+      if (nsend*size_border > maxsend)
+        grow_send_kokkos(nsend*size_border,0);
+      if (ghost_velocity) {
+        error->all(FLERR,"Required border comm not yet "
+                   "implemented with Kokkos\n");
+        n = avec->pack_border_vel(nsend,sendlist[iswap],buf_send,
+                                  pbc_flag[iswap],pbc[iswap]);
+      }
+      else
+        n = avec->
+          pack_border_kokkos(nsend,k_sendlist,k_buf_send,iswap,
+                             pbc_flag[iswap],pbc[iswap],exec_space);
+
+      // swap atoms with other proc
+      // no MPI calls except SendRecv if nsend/nrecv = 0
+      // put incoming ghosts at end of my atom arrays
+      // if swapping with self, simply copy, no messages
+
+      if (sendproc[iswap] != me) {
+        MPI_Sendrecv(&nsend,1,MPI_INT,sendproc[iswap],0,
+                     &nrecv,1,MPI_INT,recvproc[iswap],0,world,&status);
+        if (nrecv*size_border > maxrecv) grow_recv_kokkos(nrecv*size_border);
+        if (nrecv) MPI_Irecv(k_buf_recv.view<DeviceType>().ptr_on_device(),
+                             nrecv*size_border,MPI_DOUBLE,
+                             recvproc[iswap],0,world,&request);
+        if (n) MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),n,
+                        MPI_DOUBLE,sendproc[iswap],0,world);
+        if (nrecv) MPI_Wait(&request,&status);
+        buf = buf_recv;
+      } else {
+        nrecv = nsend;
+        buf = buf_send;
+      }
+
+      // unpack buffer
+
+      if (ghost_velocity) {
+        error->all(FLERR,"Required border comm not yet "
+                   "implemented with Kokkos\n");
+        avec->unpack_border_vel(nrecv,atom->nlocal+atom->nghost,buf);
+      }
+      else
+        if (sendproc[iswap] != me)
+          avec->unpack_border_kokkos(nrecv,atom->nlocal+atom->nghost,
+                                     k_buf_recv,exec_space);
+        else
+          avec->unpack_border_kokkos(nrecv,atom->nlocal+atom->nghost,
+                                     k_buf_send,exec_space);
+
+      // set all pointers & counters
+
+      smax = MAX(smax,nsend);
+      rmax = MAX(rmax,nrecv);
+      sendnum[iswap] = nsend;
+      recvnum[iswap] = nrecv;
+      size_forward_recv[iswap] = nrecv*size_forward;
+      size_reverse_send[iswap] = nrecv*size_reverse;
+      size_reverse_recv[iswap] = nsend*size_reverse;
+      firstrecv[iswap] = atom->nlocal + atom->nghost;
+      atom->nghost += nrecv;
+      iswap++;
+    }
+  }
+
+  // insure send/recv buffers are long enough for all forward & reverse comm
+
+  int max = MAX(maxforward*smax,maxreverse*rmax);
+  if (max > maxsend) grow_send_kokkos(max,0);
+  max = MAX(maxforward*rmax,maxreverse*smax);
+  if (max > maxrecv) grow_recv_kokkos(max);
+
+  // reset global->local map
+
+  if (map_style) atom->map_set();
+  if (exec_space == Host) k_sendlist.sync<LMPDeviceType>();
+  atomKK->modified(exec_space,ALL_MASK);
+  DeviceType::fence();
+}
+
+/* ----------------------------------------------------------------------
+   realloc the size of the send buffer as needed with BUFFACTOR & BUFEXTRA
+   if flag = 1, realloc
+   if flag = 0, don't need to realloc with copy, just free/malloc
+------------------------------------------------------------------------- */
+
+void CommKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space)
+{
+  maxsend = static_cast<int> (BUFFACTOR * n);
+  int maxsend_border = (maxsend+BUFEXTRA+5)/atom->avec->size_border + 2;
+  if (flag) {
+    if(space == Device)
+      k_buf_send.modify<LMPDeviceType>();
+    else
+      k_buf_send.modify<LMPHostType>();
+
+    k_buf_send.resize(maxsend_border,atom->avec->size_border);
+    buf_send = k_buf_send.view<LMPHostType>().ptr_on_device();
+  }
+  else {
+    k_buf_send = ArrayTypes<LMPDeviceType>::
+      tdual_xfloat_2d("comm:k_buf_send",maxsend_border,atom->avec->size_border);
+    buf_send = k_buf_send.view<LMPHostType>().ptr_on_device();
+  }
+}
+
+/* ----------------------------------------------------------------------
+   free/malloc the size of the recv buffer as needed with BUFFACTOR
+------------------------------------------------------------------------- */
+
+void CommKokkos::grow_recv_kokkos(int n, ExecutionSpace space)
+{
+  maxrecv = static_cast<int> (BUFFACTOR * n);
+  int maxrecv_border = (maxrecv+BUFEXTRA+5)/atom->avec->size_border + 2;
+  k_buf_recv = ArrayTypes<LMPDeviceType>::
+    tdual_xfloat_2d("comm:k_buf_recv",maxrecv_border,atom->avec->size_border);
+  buf_recv = k_buf_recv.view<LMPHostType>().ptr_on_device();
+}
+
+/* ----------------------------------------------------------------------
+   realloc the size of the iswap sendlist as needed with BUFFACTOR
+------------------------------------------------------------------------- */
+
+void CommKokkos::grow_list(int iswap, int n)
+{
+  int size = static_cast<int> (BUFFACTOR * n);
+
+  memory->grow_kokkos(k_sendlist,sendlist,maxswap,size,"comm:sendlist");
+
+  for(int i=0;i<maxswap;i++) {
+    maxsendlist[i]=size; sendlist[i]=&k_sendlist.view<LMPHostType>()(i,0);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   realloc the buffers needed for swaps
+------------------------------------------------------------------------- */
+
+void CommKokkos::grow_swap(int n)
+{
+  free_swap();
+  allocate_swap(n);
+  if (style == MULTI) {
+    free_multi();
+    allocate_multi(n);
+  }
+
+  maxswap = n;
+  int size = MAX(k_sendlist.d_view.dimension_1(),BUFMIN);
+
+  memory->grow_kokkos(k_sendlist,sendlist,maxswap,size,"comm:sendlist");
+
+  memory->grow(maxsendlist,n,"comm:maxsendlist");
+  for (int i=0;i<maxswap;i++) maxsendlist[i]=size;
+}
diff --git a/src/KOKKOS/comm_kokkos.h b/src/KOKKOS/comm_kokkos.h
new file mode 100644
index 0000000000..46d3552d2d
--- /dev/null
+++ b/src/KOKKOS/comm_kokkos.h
@@ -0,0 +1,63 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef LMP_COMM_KOKKOS_H
+#define LMP_COMM_KOKKOS_H
+
+#include "comm_brick.h"
+#include "kokkos_type.h"
+
+namespace LAMMPS_NS {
+
+class CommKokkos : public CommBrick {
+ public:
+  class AtomKokkos *atomKK;
+
+  bool exchange_comm_classic;
+  bool forward_comm_classic;
+  bool exchange_comm_on_host;
+  bool forward_comm_on_host;
+
+  CommKokkos(class LAMMPS *);
+  ~CommKokkos();
+  void init();
+
+  void forward_comm(int dummy = 0);    // forward comm of atom coords
+  void exchange();                     // move atoms to new procs
+  void borders();                      // setup list of atoms to comm
+
+  template<class DeviceType> void forward_comm_device(int dummy);
+  template<class DeviceType> void exchange_device();
+  template<class DeviceType> void borders_device();
+
+ protected:
+  DAT::tdual_int_2d k_sendlist;
+  DAT::tdual_xfloat_2d k_buf_send,k_buf_recv;
+  DAT::tdual_int_1d k_exchange_sendlist,k_exchange_copylist,k_sendflag;
+  DAT::tdual_int_1d k_count;
+  //double *buf_send;                 // send buffer for all comm
+  //double *buf_recv;                 // recv buffer for all comm
+
+  void grow_send_kokkos(int, int, ExecutionSpace space = Host);
+  void grow_recv_kokkos(int, ExecutionSpace space = Host);
+  void grow_list(int, int);
+  void grow_swap(int);
+};
+
+}
+
+#endif
+
+/* ERROR/WARNING messages:
+
+*/
diff --git a/src/KOKKOS/domain_kokkos.cpp b/src/KOKKOS/domain_kokkos.cpp
new file mode 100644
index 0000000000..c2214b611b
--- /dev/null
+++ b/src/KOKKOS/domain_kokkos.cpp
@@ -0,0 +1,207 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "domain_kokkos.h"
+#include "atom_kokkos.h"
+#include "atom_masks.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+DomainKokkos::DomainKokkos(LAMMPS *lmp) : Domain(lmp) {}
+
+/* ---------------------------------------------------------------------- */
+
+void DomainKokkos::init()
+{
+  atomKK = (AtomKokkos *) atom;
+  Domain::init();
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType, int PERIODIC, int DEFORM_VREMAP>
+struct DomainPBCFunctor {
+  typedef DeviceType device_type;
+  double lo[3],hi[3],period[3];
+  typename ArrayTypes<DeviceType>::t_x_array x;
+  typename ArrayTypes<DeviceType>::t_v_array v;
+  typename ArrayTypes<DeviceType>::t_int_1d mask;
+  typename ArrayTypes<DeviceType>::t_int_1d image;
+  int deform_groupbit;
+  double h_rate[6];
+  int xperiodic,yperiodic,zperiodic;
+
+  DomainPBCFunctor(double* _lo, double* _hi, double* _period,
+                   DAT::tdual_x_array _x, DAT::tdual_v_array _v,
+                   DAT::tdual_int_1d _mask, DAT::tdual_int_1d _image, 
+                   int _deform_groupbit, double* _h_rate,
+                   int _xperiodic, int _yperiodic, int _zperiodic):
+    x(_x.view<DeviceType>()), v(_v.view<DeviceType>()),
+    mask(_mask.view<DeviceType>()), image(_image.view<DeviceType>()),
+    deform_groupbit(_deform_groupbit),
+    xperiodic(_xperiodic), yperiodic(_yperiodic), zperiodic(_zperiodic){
+    lo[0]=_lo[0]; lo[1]=_lo[1]; lo[2]=_lo[2];
+    hi[0]=_hi[0]; hi[1]=_hi[1]; hi[2]=_hi[2];
+    period[0]=_period[0]; period[1]=_period[1]; period[2]=_period[2];
+    h_rate[0]=_h_rate[0]; h_rate[1]=_h_rate[1]; h_rate[2]=_h_rate[2];
+    h_rate[3]=_h_rate[3]; h_rate[4]=_h_rate[4]; h_rate[5]=_h_rate[5];
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int &i) const {
+    if (PERIODIC && xperiodic) {
+      if (x(i,0) < lo[0]) {
+        x(i,0) += period[0];
+        if (DEFORM_VREMAP && (mask[i] & deform_groupbit)) v(i,0) += h_rate[0];
+        int idim = image[i] & IMGMASK;
+        const int otherdims = image[i] ^ idim;
+        idim--;
+        idim &= IMGMASK;
+        image[i] = otherdims | idim;
+      }
+      if (x(i,0) >= hi[0]) {
+        x(i,0) -= period[0];
+        x(i,0) = MAX(x(i,0),lo[0]);
+        if (DEFORM_VREMAP && (mask[i] & deform_groupbit)) v(i,0) -= h_rate[0];
+        int idim = image[i] & IMGMASK;
+        const int otherdims = image[i] ^ idim;
+        idim++;
+        idim &= IMGMASK;
+        image[i] = otherdims | idim;
+      }
+    }
+    
+    if (PERIODIC && yperiodic) {
+      if (x(i,1) < lo[1]) {
+        x(i,1) += period[1];
+        if (DEFORM_VREMAP && (mask[i] & deform_groupbit)) {
+          v(i,0) += h_rate[5];
+          v(i,1) += h_rate[1];
+        }
+        int idim = (image[i] >> IMGBITS) & IMGMASK;
+        const int otherdims = image[i] ^ (idim << IMGBITS);
+        idim--;
+        idim &= IMGMASK;
+        image[i] = otherdims | (idim << IMGBITS);
+      }
+      if (x(i,1) >= hi[1]) {
+        x(i,1) -= period[1];
+        x(i,1) = MAX(x(i,1),lo[1]);
+        if (DEFORM_VREMAP && (mask[i] & deform_groupbit)) {
+          v(i,0) -= h_rate[5];
+          v(i,1) -= h_rate[1];
+        }
+        int idim = (image[i] >> IMGBITS) & IMGMASK;
+        const int otherdims = image[i] ^ (idim << IMGBITS);
+        idim++;
+        idim &= IMGMASK;
+        image[i] = otherdims | (idim << IMGBITS);
+      }
+    }
+    
+    if (PERIODIC && zperiodic) {
+      if (x(i,2) < lo[2]) {
+        x(i,2) += period[2];
+        if (DEFORM_VREMAP && (mask[i] & deform_groupbit)) {
+          v(i,0) += h_rate[4];
+          v(i,1) += h_rate[3];
+          v(i,2) += h_rate[2];
+        }
+        int idim = image[i] >> IMG2BITS;
+        const int otherdims = image[i] ^ (idim << IMG2BITS);
+        idim--;
+        idim &= IMGMASK;
+        image[i] = otherdims | (idim << IMG2BITS);
+      }
+      if (x(i,2) >= hi[2]) {
+        x(i,2) -= period[2];
+        x(i,2) = MAX(x(i,2),lo[2]);
+        if (DEFORM_VREMAP && (mask[i] & deform_groupbit)) {
+          v(i,0) -= h_rate[4];
+          v(i,1) -= h_rate[3];
+          v(i,2) -= h_rate[2];
+        }
+        int idim = image[i] >> IMG2BITS;
+        const int otherdims = image[i] ^ (idim << IMG2BITS);
+        idim++;
+        idim &= IMGMASK;
+        image[i] = otherdims | (idim << IMG2BITS);
+      }
+    }
+  }
+};
+
+/* ----------------------------------------------------------------------
+   enforce PBC and modify box image flags for each atom
+   called every reneighboring and by other commands that change atoms
+   resulting coord must satisfy lo <= coord < hi
+   MAX is important since coord - prd < lo can happen when coord = hi
+   if fix deform, remap velocity of fix group atoms by box edge velocities
+   for triclinic, atoms must be in lamda coords (0-1) before pbc is called
+   image = 10 bits for each dimension
+   increment/decrement in wrap-around fashion
+------------------------------------------------------------------------- */
+
+void DomainKokkos::pbc()
+{
+  double *lo,*hi,*period;
+  int nlocal = atomKK->nlocal;
+
+  if (triclinic == 0) {
+    lo = boxlo;
+    hi = boxhi;
+    period = prd;
+  } else {
+    lo = boxlo_lamda;
+    hi = boxhi_lamda;
+    period = prd_lamda;
+  }
+
+  atomKK->sync(Device,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK);
+  atomKK->modified(Device,X_MASK|V_MASK);
+
+  if (xperiodic || yperiodic || zperiodic) {
+    if (deform_vremap) {
+      DomainPBCFunctor<LMPDeviceType,1,1> 
+        f(lo,hi,period,
+          atomKK->k_x,atomKK->k_v,atomKK->k_mask,atomKK->k_image,
+          deform_groupbit,h_rate,xperiodic,yperiodic,zperiodic);
+      Kokkos::parallel_for(nlocal,f);
+    } else {
+      DomainPBCFunctor<LMPDeviceType,1,0> 
+        f(lo,hi,period,
+          atomKK->k_x,atomKK->k_v,atomKK->k_mask,atomKK->k_image,
+          deform_groupbit,h_rate,xperiodic,yperiodic,zperiodic);
+      Kokkos::parallel_for(nlocal,f);
+    }
+  } else {
+    if (deform_vremap) {
+      DomainPBCFunctor<LMPDeviceType,0,1> 
+        f(lo,hi,period,
+          atomKK->k_x,atomKK->k_v,atomKK->k_mask,atomKK->k_image,
+          deform_groupbit,h_rate,xperiodic,yperiodic,zperiodic);
+      Kokkos::parallel_for(nlocal,f);
+    } else {
+      DomainPBCFunctor<LMPDeviceType,0,0> 
+        f(lo,hi,period,
+          atomKK->k_x,atomKK->k_v,atomKK->k_mask,atomKK->k_image,
+          deform_groupbit,h_rate,xperiodic,yperiodic,zperiodic);
+      Kokkos::parallel_for(nlocal,f);
+    }
+  }
+
+  LMPDeviceType::fence();
+}
+
diff --git a/src/KOKKOS/domain_kokkos.h b/src/KOKKOS/domain_kokkos.h
new file mode 100644
index 0000000000..36e0aa4aaa
--- /dev/null
+++ b/src/KOKKOS/domain_kokkos.h
@@ -0,0 +1,38 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef LMP_DOMAIN_KOKKOS_H
+#define LMP_DOMAIN_KOKKOS_H
+
+#include "domain.h"
+#include "kokkos_type.h"
+
+namespace LAMMPS_NS {
+
+class DomainKokkos : public Domain {
+ public:
+  class AtomKokkos *atomKK;
+
+  DomainKokkos(class LAMMPS *);
+  ~DomainKokkos() {}
+  void init();
+  void pbc();
+};
+
+}
+
+#endif
+
+/* ERROR/WARNING messages:
+
+*/
diff --git a/src/KOKKOS/fix_nve_kokkos.cpp b/src/KOKKOS/fix_nve_kokkos.cpp
new file mode 100644
index 0000000000..3076dca4fa
--- /dev/null
+++ b/src/KOKKOS/fix_nve_kokkos.cpp
@@ -0,0 +1,177 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "stdio.h"
+#include "string.h"
+#include "fix_nve_kokkos.h"
+#include "atom_masks.h"
+#include "atom_kokkos.h"
+#include "force.h"
+#include "update.h"
+#include "respa.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+using namespace FixConst;
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+FixNVEKokkos<DeviceType>::FixNVEKokkos(LAMMPS *lmp, int narg, char **arg) :
+  FixNVE(lmp, narg, arg)
+{
+  atomKK = (AtomKokkos *) atom;
+  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
+
+  datamask_read = X_MASK | V_MASK | F_MASK | MASK_MASK | RMASS_MASK | TYPE_MASK;
+  datamask_modify = X_MASK | V_MASK | F_MASK;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void FixNVEKokkos<DeviceType>::init()
+{
+  FixNVE::init();
+
+  atomKK->k_mass.modify<LMPHostType>();
+  atomKK->k_mass.sync<LMPDeviceType>();
+}
+
+/* ----------------------------------------------------------------------
+   allow for both per-type and per-atom mass
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void FixNVEKokkos<DeviceType>::initial_integrate(int vflag)
+{
+  atomKK->sync(execution_space,datamask_read);
+  atomKK->modified(execution_space,datamask_modify);
+
+  x = atomKK->k_x.view<DeviceType>();
+  v = atomKK->k_v.view<DeviceType>();
+  f = atomKK->k_f.view<DeviceType>();
+  rmass = atomKK->rmass;
+  mass = atomKK->k_mass.view<DeviceType>();
+  type = atomKK->k_type.view<DeviceType>();
+  mask = atomKK->k_mask.view<DeviceType>();
+  int nlocal = atomKK->nlocal;
+  if (igroup == atomKK->firstgroup) nlocal = atomKK->nfirst;
+
+  if (rmass) {
+    FixNVEKokkosInitialIntegrateFunctor<DeviceType,1> functor(this);
+    Kokkos::parallel_for(nlocal,functor);
+  } else {
+    FixNVEKokkosInitialIntegrateFunctor<DeviceType,0> functor(this);
+    Kokkos::parallel_for(nlocal,functor);
+  }
+  DeviceType::fence();
+}
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void FixNVEKokkos<DeviceType>::initial_integrate_item(int i) const
+{
+  if (mask[i] & groupbit) {
+    const double dtfm = dtf / mass[type[i]];
+    v(i,0) += dtfm * f(i,0);
+    v(i,1) += dtfm * f(i,1);
+    v(i,2) += dtfm * f(i,2);
+    x(i,0) += dtv * v(i,0);
+    x(i,1) += dtv * v(i,1);
+    x(i,2) += dtv * v(i,2);
+  }
+}
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void FixNVEKokkos<DeviceType>::initial_integrate_rmass_item(int i) const
+{
+  if (mask[i] & groupbit) {
+    const double dtfm = dtf / rmass[type[i]];
+    v(i,0) += dtfm * f(i,0);
+    v(i,1) += dtfm * f(i,1);
+    v(i,2) += dtfm * f(i,2);
+    x(i,0) += dtv * v(i,0);
+    x(i,1) += dtv * v(i,1);
+    x(i,2) += dtv * v(i,2);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void FixNVEKokkos<DeviceType>::final_integrate()
+{
+  atomKK->sync(execution_space,datamask_read);
+  atomKK->modified(execution_space,datamask_modify);
+
+  v = atomKK->k_v.view<DeviceType>();
+  f = atomKK->k_f.view<DeviceType>();
+  rmass = atomKK->rmass;
+  mass = atomKK->k_mass.view<DeviceType>();
+  type = atomKK->k_type.view<DeviceType>();
+  mask = atomKK->k_mask.view<DeviceType>();
+  int nlocal = atomKK->nlocal;
+  if (igroup == atomKK->firstgroup) nlocal = atomKK->nfirst;
+
+  if (rmass) {
+    FixNVEKokkosFinalIntegrateFunctor<DeviceType,1> functor(this);
+    Kokkos::parallel_for(nlocal,functor);
+  } else {
+    FixNVEKokkosFinalIntegrateFunctor<DeviceType,0> functor(this);
+    Kokkos::parallel_for(nlocal,functor);
+  }
+  DeviceType::fence();
+
+  // debug
+  //atomKK->sync(Host,datamask_read);
+}
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void FixNVEKokkos<DeviceType>::final_integrate_item(int i) const
+{
+  if (mask[i] & groupbit) {
+    const double dtfm = dtf / mass[type[i]];
+    v(i,0) += dtfm * f(i,0);
+    v(i,1) += dtfm * f(i,1);
+    v(i,2) += dtfm * f(i,2);
+  }
+}
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void FixNVEKokkos<DeviceType>::final_integrate_rmass_item(int i) const
+{
+  if (mask[i] & groupbit) {
+    const double dtfm = dtf / rmass[type[i]];
+    v(i,0) += dtfm * f(i,0);
+    v(i,1) += dtfm * f(i,1);
+    v(i,2) += dtfm * f(i,2);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void FixNVEKokkos<DeviceType>::cleanup_copy()
+{
+  id = style = NULL;
+  vatom = NULL;
+}
+
+template class FixNVEKokkos<LMPDeviceType>;
+#if DEVICE==2
+template class FixNVEKokkos<LMPHostType>;
+#endif
diff --git a/src/KOKKOS/fix_nve_kokkos.h b/src/KOKKOS/fix_nve_kokkos.h
new file mode 100644
index 0000000000..bd9ec4d816
--- /dev/null
+++ b/src/KOKKOS/fix_nve_kokkos.h
@@ -0,0 +1,110 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef FIX_CLASS
+
+FixStyle(nve/kk,FixNVEKokkos<LMPDeviceType>)
+FixStyle(nve/kk/device,FixNVEKokkos<LMPDeviceType>)
+FixStyle(nve/kk/host,FixNVEKokkos<LMPHostType>)
+
+#else
+
+#ifndef LMP_FIX_NVE_KOKKOS_H
+#define LMP_FIX_NVE_KOKKOS_H
+
+#include "fix_nve.h"
+#include "kokkos_type.h"
+
+namespace LAMMPS_NS {
+
+template<class DeviceType>
+class FixNVEKokkos;
+
+template <class DeviceType, int RMass>
+class FixNVEKokkosInitialIntegrateFunctor;
+template <class DeviceType, int RMass>
+class FixNVEKokkosFinalIntegrateFunctor;
+
+template<class DeviceType>
+class FixNVEKokkos : public FixNVE {
+ public:
+  FixNVEKokkos(class LAMMPS *, int, char **);
+  ~FixNVEKokkos() {}
+  void cleanup_copy();
+  void init();
+  void initial_integrate(int);
+  void final_integrate();
+
+  KOKKOS_INLINE_FUNCTION
+  void initial_integrate_item(int) const;
+  KOKKOS_INLINE_FUNCTION
+  void initial_integrate_rmass_item(int) const;
+  KOKKOS_INLINE_FUNCTION
+  void final_integrate_item(int) const;
+  KOKKOS_INLINE_FUNCTION
+  void final_integrate_rmass_item(int) const;
+
+ private:
+  class AtomKokkos *atomKK;
+
+  typename ArrayTypes<DeviceType>::t_x_array x;
+  typename ArrayTypes<DeviceType>::t_v_array v;
+  typename ArrayTypes<DeviceType>::t_f_array_const f;
+  double *rmass;
+  typename ArrayTypes<DeviceType>::t_float_1d_randomread mass;
+  typename ArrayTypes<DeviceType>::t_int_1d type;
+  typename ArrayTypes<DeviceType>::t_int_1d mask;
+};
+
+template <class DeviceType, int RMass>
+struct FixNVEKokkosInitialIntegrateFunctor  {
+  typedef DeviceType  device_type ;
+  FixNVEKokkos<DeviceType> c;
+
+  FixNVEKokkosInitialIntegrateFunctor(FixNVEKokkos<DeviceType>* c_ptr):
+  c(*c_ptr) {c.cleanup_copy();};
+  KOKKOS_INLINE_FUNCTION
+  void operator()(const int i) const {
+    if (RMass) c.initial_integrate_rmass_item(i);
+    else c.initial_integrate_item(i);
+  }
+};
+
+template <class DeviceType, int RMass>
+struct FixNVEKokkosFinalIntegrateFunctor  {
+  typedef DeviceType  device_type ;
+  FixNVEKokkos<DeviceType> c;
+
+  FixNVEKokkosFinalIntegrateFunctor(FixNVEKokkos<DeviceType>* c_ptr):
+  c(*c_ptr) {c.cleanup_copy();};
+  KOKKOS_INLINE_FUNCTION
+  void operator()(const int i) const {
+    if (RMass) c.final_integrate_rmass_item(i);
+    else c.final_integrate_item(i);
+  }
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+E: Illegal ... command
+
+Self-explanatory.  Check the input script syntax and compare to the
+documentation for the command.  You can use -echo screen as a
+command-line option when running LAMMPS to see the offending line.
+
+*/
diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp
new file mode 100644
index 0000000000..4f6031f229
--- /dev/null
+++ b/src/KOKKOS/kokkos.cpp
@@ -0,0 +1,220 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "stdio.h"
+#include "string.h"
+#include "stdlib.h"
+#include "ctype.h"
+#include "kokkos.h"
+#include "lammps.h"
+#include "neighbor_kokkos.h"
+#include "neigh_list_kokkos.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+enum{FULL,HALFTHREAD,HALF};
+
+/* ---------------------------------------------------------------------- */
+
+KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
+{
+  kokkos_exists = 1;
+  lmp->kokkos = this;
+
+  // process any command-line args that invoke Kokkos settings
+
+  int device = 0;
+  int num_threads = 1;
+  int numa = 1;
+
+  int iarg = 0;
+  while (iarg < narg) {
+    if (strcmp(arg[iarg],"d") == 0 || strcmp(arg[iarg],"device") == 0) {
+      if (iarg+2 > narg) error->all(FLERR,"Invalid Kokkos command-line args");
+      device = atoi(arg[iarg+1]);
+      iarg += 2;
+
+    } else if (strcmp(arg[iarg],"g") == 0 || 
+               strcmp(arg[iarg],"gpus") == 0) {
+      if (iarg+2 > narg) error->all(FLERR,"Invalid Kokkos command-line args");
+      int ngpu = atoi(arg[iarg+1]);
+      iarg += 2;
+
+      int skip_gpu = 9999;
+      if (iarg+2 < narg && isdigit(arg[iarg+2][0])) {
+        skip_gpu = atoi(arg[iarg+2]);
+        iarg++;
+      }
+
+      char *str;
+      if (str = getenv("SLURM_LOCALID")) {
+        int local_rank = atoi(str);
+        device = local_rank % ngpu;
+        if (device >= skip_gpu) device++;
+      }
+      if (str = getenv("MV2_COMM_WORLD_LOCAL_RANK")) {
+        int local_rank = atoi(str);
+        device = local_rank % ngpu;
+        if (device >= skip_gpu) device++;
+      }
+      if (str = getenv("OMPI_COMM_WORLD_LOCAL_RANK")) {
+        int local_rank = atoi(str);
+        device = local_rank % ngpu;
+        if (device >= skip_gpu) device++;
+      }
+
+    } else if (strcmp(arg[iarg],"t") == 0 ||
+               strcmp(arg[iarg],"threads") == 0) {
+      num_threads = atoi(arg[iarg+1]);
+      iarg += 2;
+
+    } else if (strcmp(arg[iarg],"n") == 0 ||
+               strcmp(arg[iarg],"numa") == 0) {
+      numa = atoi(arg[iarg+1]);
+      iarg += 2;
+
+    } else error->all(FLERR,"Invalid Kokkos command-line args");
+  }
+
+  // initialize Kokkos
+
+#if DEVICE==2
+  Kokkos::Cuda::host_mirror_device_type::initialize(num_threads,numa);
+  Kokkos::Cuda::SelectDevice select_device(device);
+  Kokkos::Cuda::initialize(select_device);
+#else
+  LMPHostType::initialize(num_threads,numa);
+#endif
+
+  // default settings for package kokkos command
+
+  neighflag = FULL;
+  exchange_comm_classic = 0;
+  forward_comm_classic = 0;
+  exchange_comm_on_host = 1;
+  forward_comm_on_host = 1;
+}
+
+/* ---------------------------------------------------------------------- */
+
+KokkosLMP::~KokkosLMP()
+{
+  // finalize Kokkos
+
+#if DEVICE==2
+  Kokkos::Cuda::finalize();
+  Kokkos::Cuda::host_mirror_device_type::finalize();
+#else
+  LMPHostType::finalize();
+#endif
+}
+
+/* ----------------------------------------------------------------------
+   invoked by package kokkos command
+------------------------------------------------------------------------- */
+
+void KokkosLMP::accelerator(int narg, char **arg)
+{
+  int iarg = 0;
+  while (iarg < narg) {
+    if (strcmp(arg[iarg],"neigh") == 0) {
+      if (iarg+2 > narg) error->all(FLERR,"Illegal package command");
+      if (strcmp(arg[iarg+1],"full") == 0) neighflag = FULL;
+      else if (strcmp(arg[iarg+1],"half/thread") == 0) neighflag = HALFTHREAD;
+      else if (strcmp(arg[iarg+1],"half") == 0) neighflag = HALF;
+      else if (strcmp(arg[iarg+1],"n2") == 0) neighflag = N2;
+      else if (strcmp(arg[iarg+1],"full/cluster") == 0) neighflag = FULLCLUSTER;
+      else error->all(FLERR,"Illegal package command");
+      iarg += 2;
+    } else if (strcmp(arg[iarg],"comm/exchange") == 0) {
+      if (iarg+2 > narg) error->all(FLERR,"Illegal package command");
+      if (strcmp(arg[iarg+1],"no") == 0) exchange_comm_classic = 1;
+      else if (strcmp(arg[iarg+1],"host") == 0) {
+        exchange_comm_classic = 0;
+        exchange_comm_on_host = 1;
+      } else if (strcmp(arg[iarg+1],"device") == 0) {
+        exchange_comm_classic = 0;
+        exchange_comm_on_host = 0;
+      } else error->all(FLERR,"Illegal package command");
+      iarg += 2;
+    } else if (strcmp(arg[iarg],"comm/forward") == 0) {
+      if (iarg+2 > narg) error->all(FLERR,"Illegal package command");
+      if (strcmp(arg[iarg+1],"no") == 0) forward_comm_classic = 1;
+      else if (strcmp(arg[iarg+1],"host") == 0) {
+        forward_comm_classic = 0;
+        forward_comm_on_host = 1;
+      } else if (strcmp(arg[iarg+1],"device") == 0) {
+        forward_comm_classic = 0;
+        forward_comm_on_host = 0;
+      } else error->all(FLERR,"Illegal package command");
+      iarg += 2;
+    } else error->all(FLERR,"Illegal package command");
+  }
+}
+
+/* ----------------------------------------------------------------------
+   called by Finish
+------------------------------------------------------------------------- */
+
+int KokkosLMP::neigh_list_kokkos(int m)
+{
+  NeighborKokkos *nk = (NeighborKokkos *) neighbor;
+  if (nk->lists_host[m] && nk->lists_host[m]->d_numneigh.dimension_0()) 
+    return 1;
+  if (nk->lists_device[m] && nk->lists_device[m]->d_numneigh.dimension_0()) 
+    return 1;
+  return 0;
+}
+
+/* ----------------------------------------------------------------------
+   called by Finish
+------------------------------------------------------------------------- */
+
+int KokkosLMP::neigh_count(int m)
+{
+  int inum;
+  int nneigh = 0;
+
+  ArrayTypes<LMPHostType>::t_int_1d h_ilist;
+  ArrayTypes<LMPHostType>::t_int_1d h_numneigh;
+
+  NeighborKokkos *nk = (NeighborKokkos *) neighbor;
+  if (nk->lists_host[m]) {
+    inum = nk->lists_host[m]->inum;
+#ifndef KOKKOS_USE_UVM
+    h_ilist = Kokkos::create_mirror_view(nk->lists_host[m]->d_ilist);
+    h_numneigh = Kokkos::create_mirror_view(nk->lists_host[m]->d_numneigh);
+#else
+    h_ilist = nk->lists_host[m]->d_ilist;
+    h_numneigh = nk->lists_host[m]->d_numneigh;
+#endif
+    Kokkos::deep_copy(h_ilist,nk->lists_host[m]->d_ilist);
+    Kokkos::deep_copy(h_numneigh,nk->lists_host[m]->d_numneigh);
+  } else if (nk->lists_device[m]) {
+    inum = nk->lists_device[m]->inum;
+#ifndef KOKKOS_USE_UVM
+    h_ilist = Kokkos::create_mirror_view(nk->lists_device[m]->d_ilist);
+    h_numneigh = Kokkos::create_mirror_view(nk->lists_device[m]->d_numneigh);
+#else
+    h_ilist = nk->lists_device[m]->d_ilist;
+    h_numneigh = nk->lists_device[m]->d_numneigh;
+#endif
+    Kokkos::deep_copy(h_ilist,nk->lists_device[m]->d_ilist);
+    Kokkos::deep_copy(h_numneigh,nk->lists_device[m]->d_numneigh);
+  }
+
+  for (int i = 0; i < inum; i++) nneigh += h_numneigh[h_ilist[i]];
+
+  return nneigh;
+}
diff --git a/src/KOKKOS/kokkos.h b/src/KOKKOS/kokkos.h
new file mode 100644
index 0000000000..512c76a489
--- /dev/null
+++ b/src/KOKKOS/kokkos.h
@@ -0,0 +1,40 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef KOKKOS_LMP_H
+#define KOKKOS_LMP_H
+
+#include "pointers.h"
+#include "kokkos_type.h"
+
+namespace LAMMPS_NS {
+
+class KokkosLMP : protected Pointers {
+ public:
+  int kokkos_exists;
+  int neighflag;
+  int exchange_comm_classic;
+  int forward_comm_classic;
+  int exchange_comm_on_host;
+  int forward_comm_on_host;
+
+  KokkosLMP(class LAMMPS *, int, char **);
+  ~KokkosLMP();
+  void accelerator(int, char **);
+  int neigh_list_kokkos(int);
+  int neigh_count(int);
+};
+
+}
+
+#endif
diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h
new file mode 100644
index 0000000000..4887b91b10
--- /dev/null
+++ b/src/KOKKOS/kokkos_type.h
@@ -0,0 +1,617 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef LMP_LMPTYPE_KOKKOS_H
+#define LMP_LMPTYPE_KOKKOS_H
+
+#include <Kokkos_View.hpp>
+#include <Kokkos_Macros.hpp>
+#include <Kokkos_Atomic.hpp>
+#include <Kokkos_DualView.hpp>
+#include <impl/Kokkos_Timer.hpp>
+#include <Kokkos_Vectorization.hpp>
+
+#define MAX_TYPES_STACKPARAMS 12
+#define NeighClusterSize 8
+// set LMPHostype and LMPDeviceType
+
+#ifndef DEVICE
+#define DEVICE 1
+#endif
+
+#if DEVICE==1
+  #ifdef KOKKOS_HAVE_OPENMP
+    #include "Kokkos_OpenMP.hpp"
+    typedef Kokkos::OpenMP LMPDeviceType;
+    typedef Kokkos::OpenMP LMPHostType;
+  #else
+    #include "Kokkos_Threads.hpp"
+    typedef Kokkos::Threads LMPDeviceType;
+    typedef Kokkos::Threads LMPHostType;
+  #endif
+  #ifndef __CUDACC__
+    struct double2 {
+      double x, y;
+    };
+    struct float2 {
+      float x, y;
+    };
+    struct double4 {
+      double x, y, z, w;
+    };
+    struct float4 {
+      float x, y, z, w;
+    };
+  #endif
+#else
+  #include "cuda.h"
+  #include "cuda_runtime.h"
+  #include "Kokkos_Cuda.hpp"
+  #include "Kokkos_Threads.hpp"
+  typedef Kokkos::Cuda LMPDeviceType;
+  typedef Kokkos::Cuda::host_mirror_device_type LMPHostType;
+#endif
+
+// set ExecutionSpace stuct with variable "space"
+
+template<class Device>
+struct ExecutionSpaceFromDevice;
+
+#ifdef KOKKOS_HAVE_OPENMP
+template<>
+struct ExecutionSpaceFromDevice<Kokkos::OpenMP> {
+  static const LAMMPS_NS::ExecutionSpace space = LAMMPS_NS::Host;
+};
+#else
+template<>
+struct ExecutionSpaceFromDevice<Kokkos::Threads> {
+  static const LAMMPS_NS::ExecutionSpace space = LAMMPS_NS::Host;
+};
+#endif
+#if DEVICE==2
+template<>
+struct ExecutionSpaceFromDevice<Kokkos::Cuda> {
+  static const LAMMPS_NS::ExecutionSpace space = LAMMPS_NS::Device;
+};
+#endif
+
+// define precision
+// handle global precision, force, energy, positions, kspace separately
+
+#ifndef PRECISION
+#define PRECISION 2
+#endif
+#if PRECISION==1
+typedef float LMP_FLOAT;
+typedef float2 LMP_FLOAT2;
+typedef float4 LMP_FLOAT4;
+#else
+typedef double LMP_FLOAT;
+typedef double2 LMP_FLOAT2;
+typedef double4 LMP_FLOAT4;
+#endif
+
+#ifndef PREC_FORCE
+#define PREC_FORCE PRECISION
+#endif
+
+#if PREC_FORCE==1
+typedef float F_FLOAT;
+typedef float2 F_FLOAT2;
+typedef float4 F_FLOAT4;
+#else
+typedef double F_FLOAT;
+typedef double2 F_FLOAT2;
+typedef double4 F_FLOAT4;
+#endif
+
+#ifndef PREC_ENERGY
+#define PREC_ENERGY PRECISION
+#endif
+
+#if PREC_ENERGY==1
+typedef float E_FLOAT;
+typedef float2 E_FLOAT2;
+typedef float4 E_FLOAT4;
+#else
+typedef double E_FLOAT;
+typedef double2 E_FLOAT2;
+typedef double4 E_FLOAT4;
+#endif
+
+struct s_EV_FLOAT {
+  E_FLOAT evdwl;
+  E_FLOAT ecoul;
+  E_FLOAT v[6];
+  KOKKOS_INLINE_FUNCTION
+  s_EV_FLOAT() {
+	  evdwl = 0;
+	  ecoul = 0;
+	  v[0] = 0; v[1] = 0; v[2] = 0;
+	  v[3] = 0; v[4] = 0; v[5] = 0;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  s_EV_FLOAT& operator+=(const s_EV_FLOAT &rhs) {
+	evdwl += rhs.evdwl;
+	ecoul += rhs.ecoul;
+	v[0] += rhs.v[0];
+	v[1] += rhs.v[1];
+	v[2] += rhs.v[2];
+	v[3] += rhs.v[3];
+	v[4] += rhs.v[4];
+	v[5] += rhs.v[5];
+	return *this;
+  }
+};
+typedef struct s_EV_FLOAT EV_FLOAT;
+
+#ifndef PREC_POS
+#define PREC_POS PRECISION
+#endif
+
+#if PREC_POS==1
+typedef float X_FLOAT;
+typedef float2 X_FLOAT2;
+typedef float4 X_FLOAT4;
+#else
+typedef double X_FLOAT;
+typedef double2 X_FLOAT2;
+typedef double4 X_FLOAT4;
+#endif
+
+#ifndef PREC_VELOCITIES
+#define PREC_VELOCITIES PRECISION
+#endif
+
+#if PREC_VELOCITIES==1
+typedef float V_FLOAT;
+typedef float2 V_FLOAT2;
+typedef float4 V_FLOAT4;
+#else
+typedef double V_FLOAT;
+typedef double2 V_FLOAT2;
+typedef double4 V_FLOAT4;
+#endif
+
+#if PREC_KSPACE==1
+typedef float K_FLOAT;
+typedef float2 K_FLOAT2;
+typedef float4 K_FLOAT4;
+#else
+typedef double K_FLOAT;
+typedef double2 K_FLOAT2;
+typedef double4 K_FLOAT4;
+#endif
+
+// ------------------------------------------------------------------------
+
+// LAMMPS types
+
+template <class DeviceType>
+struct ArrayTypes;
+
+template <>
+struct ArrayTypes<LMPDeviceType> {
+
+// scalar types
+
+typedef Kokkos::
+  DualView<int, LMPDeviceType::array_layout, LMPDeviceType> tdual_int_scalar;
+typedef tdual_int_scalar::t_dev t_int_scalar;
+typedef tdual_int_scalar::t_dev_const t_int_scalar_const;
+typedef tdual_int_scalar::t_dev_um t_int_scalar_um;
+typedef tdual_int_scalar::t_dev_const_um t_int_scalar_const_um;
+
+typedef Kokkos::
+  DualView<LMP_FLOAT, LMPDeviceType::array_layout, LMPDeviceType> 
+  tdual_float_scalar;
+typedef tdual_float_scalar::t_dev t_float_scalar;
+typedef tdual_float_scalar::t_dev_const t_float_scalar_const;
+typedef tdual_float_scalar::t_dev_um t_float_scalar_um;
+typedef tdual_float_scalar::t_dev_const_um t_float_scalar_const_um;
+
+// generic array types
+
+typedef Kokkos::
+  DualView<int*, LMPDeviceType::array_layout, LMPDeviceType> tdual_int_1d;
+typedef tdual_int_1d::t_dev t_int_1d;
+typedef tdual_int_1d::t_dev_const t_int_1d_const;
+typedef tdual_int_1d::t_dev_um t_int_1d_um;
+typedef tdual_int_1d::t_dev_const_um t_int_1d_const_um;
+typedef tdual_int_1d::t_dev_const_randomread t_int_1d_randomread;
+
+typedef Kokkos::
+  DualView<int**, Kokkos::LayoutRight, LMPDeviceType> tdual_int_2d;
+typedef tdual_int_2d::t_dev t_int_2d;
+typedef tdual_int_2d::t_dev_const t_int_2d_const;
+typedef tdual_int_2d::t_dev_um t_int_2d_um;
+typedef tdual_int_2d::t_dev_const_um t_int_2d_const_um;
+typedef tdual_int_2d::t_dev_const_randomread t_int_2d_randomread;
+
+typedef Kokkos::
+  DualView<LAMMPS_NS::tagint*, LMPDeviceType::array_layout, LMPDeviceType> 
+  tdual_tagint_1d;
+typedef tdual_tagint_1d::t_dev t_tagint_1d;
+typedef tdual_tagint_1d::t_dev_const t_tagint_1d_const;
+typedef tdual_tagint_1d::t_dev_um t_tagint_1d_um;
+typedef tdual_tagint_1d::t_dev_const_um t_tagint_1d_const_um;
+typedef tdual_tagint_1d::t_dev_const_randomread t_tagint_1d_randomread;
+
+// 1d float array n
+
+typedef Kokkos::DualView<LMP_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_float_1d;
+typedef tdual_float_1d::t_dev t_float_1d;
+typedef tdual_float_1d::t_dev_const t_float_1d_const;
+typedef tdual_float_1d::t_dev_um t_float_1d_um;
+typedef tdual_float_1d::t_dev_const_um t_float_1d_const_um;
+typedef tdual_float_1d::t_dev_const_randomread t_float_1d_randomread;
+
+//2d float array n
+typedef Kokkos::DualView<LMP_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_float_2d;
+typedef tdual_float_2d::t_dev t_float_2d;
+typedef tdual_float_2d::t_dev_const t_float_2d_const;
+typedef tdual_float_2d::t_dev_um t_float_2d_um;
+typedef tdual_float_2d::t_dev_const_um t_float_2d_const_um;
+typedef tdual_float_2d::t_dev_const_randomread t_float_2d_randomread;
+
+//Position Types
+//1d X_FLOAT array n
+typedef Kokkos::DualView<X_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_xfloat_1d;
+typedef tdual_xfloat_1d::t_dev t_xfloat_1d;
+typedef tdual_xfloat_1d::t_dev_const t_xfloat_1d_const;
+typedef tdual_xfloat_1d::t_dev_um t_xfloat_1d_um;
+typedef tdual_xfloat_1d::t_dev_const_um t_xfloat_1d_const_um;
+typedef tdual_xfloat_1d::t_dev_const_randomread t_xfloat_1d_randomread;
+
+//2d X_FLOAT array n*m
+typedef Kokkos::DualView<X_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_xfloat_2d;
+typedef tdual_xfloat_2d::t_dev t_xfloat_2d;
+typedef tdual_xfloat_2d::t_dev_const t_xfloat_2d_const;
+typedef tdual_xfloat_2d::t_dev_um t_xfloat_2d_um;
+typedef tdual_xfloat_2d::t_dev_const_um t_xfloat_2d_const_um;
+typedef tdual_xfloat_2d::t_dev_const_randomread t_xfloat_2d_randomread;
+
+//2d X_FLOAT array n*4 
+#ifdef LMP_KOKKOS_NO_LEGACY
+typedef Kokkos::DualView<X_FLOAT*[3], Kokkos::LayoutLeft, LMPDeviceType> tdual_x_array;
+#else
+typedef Kokkos::DualView<X_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_x_array;
+#endif
+typedef tdual_x_array::t_dev t_x_array;
+typedef tdual_x_array::t_dev_const t_x_array_const;
+typedef tdual_x_array::t_dev_um t_x_array_um;
+typedef tdual_x_array::t_dev_const_um t_x_array_const_um;
+typedef tdual_x_array::t_dev_const_randomread t_x_array_randomread;
+
+//Velocity Types
+//1d V_FLOAT array n
+typedef Kokkos::DualView<V_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_vfloat_1d;
+typedef tdual_vfloat_1d::t_dev t_vfloat_1d;
+typedef tdual_vfloat_1d::t_dev_const t_vfloat_1d_const;
+typedef tdual_vfloat_1d::t_dev_um t_vfloat_1d_um;
+typedef tdual_vfloat_1d::t_dev_const_um t_vfloat_1d_const_um;
+typedef tdual_vfloat_1d::t_dev_const_randomread t_vfloat_1d_randomread;
+
+//2d V_FLOAT array n*m
+typedef Kokkos::DualView<V_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_vfloat_2d;
+typedef tdual_vfloat_2d::t_dev t_vfloat_2d;
+typedef tdual_vfloat_2d::t_dev_const t_vfloat_2d_const;
+typedef tdual_vfloat_2d::t_dev_um t_vfloat_2d_um;
+typedef tdual_vfloat_2d::t_dev_const_um t_vfloat_2d_const_um;
+typedef tdual_vfloat_2d::t_dev_const_randomread t_vfloat_2d_randomread;
+
+//2d V_FLOAT array n*3
+typedef Kokkos::DualView<V_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_v_array;
+//typedef Kokkos::DualView<V_FLOAT*[3], LMPDeviceType::array_layout, LMPDeviceType> tdual_v_array;
+typedef tdual_v_array::t_dev t_v_array;
+typedef tdual_v_array::t_dev_const t_v_array_const;
+typedef tdual_v_array::t_dev_um t_v_array_um;
+typedef tdual_v_array::t_dev_const_um t_v_array_const_um;
+typedef tdual_v_array::t_dev_const_randomread t_v_array_randomread;
+
+//Force Types
+//1d F_FLOAT array n
+
+typedef Kokkos::DualView<F_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_ffloat_1d;
+typedef tdual_ffloat_1d::t_dev t_ffloat_1d;
+typedef tdual_ffloat_1d::t_dev_const t_ffloat_1d_const;
+typedef tdual_ffloat_1d::t_dev_um t_ffloat_1d_um;
+typedef tdual_ffloat_1d::t_dev_const_um t_ffloat_1d_const_um;
+typedef tdual_ffloat_1d::t_dev_const_randomread t_ffloat_1d_randomread;
+
+//2d F_FLOAT array n*m
+
+typedef Kokkos::DualView<F_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_ffloat_2d;
+typedef tdual_ffloat_2d::t_dev t_ffloat_2d;
+typedef tdual_ffloat_2d::t_dev_const t_ffloat_2d_const;
+typedef tdual_ffloat_2d::t_dev_um t_ffloat_2d_um;
+typedef tdual_ffloat_2d::t_dev_const_um t_ffloat_2d_const_um;
+typedef tdual_ffloat_2d::t_dev_const_randomread t_ffloat_2d_randomread;
+
+//2d F_FLOAT array n*3
+
+typedef Kokkos::DualView<F_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_f_array;
+//typedef Kokkos::DualView<F_FLOAT*[3], LMPDeviceType::array_layout, LMPDeviceType> tdual_f_array;
+typedef tdual_f_array::t_dev t_f_array;
+typedef tdual_f_array::t_dev_const t_f_array_const;
+typedef tdual_f_array::t_dev_um t_f_array_um;
+typedef tdual_f_array::t_dev_const_um t_f_array_const_um;
+typedef tdual_f_array::t_dev_const_randomread t_f_array_randomread;
+
+//2d F_FLOAT array n*6 (for virial)
+
+typedef Kokkos::DualView<F_FLOAT*[6], Kokkos::LayoutRight, LMPDeviceType> tdual_virial_array;
+typedef tdual_virial_array::t_dev t_virial_array;
+typedef tdual_virial_array::t_dev_const t_virial_array_const;
+typedef tdual_virial_array::t_dev_um t_virial_array_um;
+typedef tdual_virial_array::t_dev_const_um t_virial_array_const_um;
+typedef tdual_virial_array::t_dev_const_randomread t_virial_array_randomread;
+
+//Energy Types
+//1d E_FLOAT array n
+
+typedef Kokkos::DualView<E_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_efloat_1d;
+typedef tdual_efloat_1d::t_dev t_efloat_1d;
+typedef tdual_efloat_1d::t_dev_const t_efloat_1d_const;
+typedef tdual_efloat_1d::t_dev_um t_efloat_1d_um;
+typedef tdual_efloat_1d::t_dev_const_um t_efloat_1d_const_um;
+typedef tdual_efloat_1d::t_dev_const_randomread t_efloat_1d_randomread;
+
+//2d E_FLOAT array n*m
+
+typedef Kokkos::DualView<E_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_efloat_2d;
+typedef tdual_efloat_2d::t_dev t_efloat_2d;
+typedef tdual_efloat_2d::t_dev_const t_efloat_2d_const;
+typedef tdual_efloat_2d::t_dev_um t_efloat_2d_um;
+typedef tdual_efloat_2d::t_dev_const_um t_efloat_2d_const_um;
+typedef tdual_efloat_2d::t_dev_const_randomread t_efloat_2d_randomread;
+
+//2d E_FLOAT array n*3 
+
+typedef Kokkos::DualView<E_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_e_array;
+typedef tdual_e_array::t_dev t_e_array;
+typedef tdual_e_array::t_dev_const t_e_array_const;
+typedef tdual_e_array::t_dev_um t_e_array_um;
+typedef tdual_e_array::t_dev_const_um t_e_array_const_um;
+typedef tdual_e_array::t_dev_const_randomread t_e_array_randomread;
+
+//Neighbor Types
+
+typedef Kokkos::DualView<int**, LMPDeviceType::array_layout, LMPDeviceType> tdual_neighbors_2d;
+typedef tdual_neighbors_2d::t_dev t_neighbors_2d;
+typedef tdual_neighbors_2d::t_dev_const t_neighbors_2d_const;
+typedef tdual_neighbors_2d::t_dev_um t_neighbors_2d_um;
+typedef tdual_neighbors_2d::t_dev_const_um t_neighbors_2d_const_um;
+typedef tdual_neighbors_2d::t_dev_const_randomread t_neighbors_2d_randomread;
+
+};
+
+#if DEVICE==2
+template <>
+struct ArrayTypes<LMPHostType> {
+
+//Scalar Types
+
+typedef Kokkos::DualView<int, LMPDeviceType::array_layout, LMPDeviceType> tdual_int_scalar;
+typedef tdual_int_scalar::t_host t_int_scalar;
+typedef tdual_int_scalar::t_host_const t_int_scalar_const;
+typedef tdual_int_scalar::t_host_um t_int_scalar_um;
+typedef tdual_int_scalar::t_host_const_um t_int_scalar_const_um;
+
+typedef Kokkos::DualView<LMP_FLOAT, LMPDeviceType::array_layout, LMPDeviceType> tdual_float_scalar;
+typedef tdual_float_scalar::t_host t_float_scalar;
+typedef tdual_float_scalar::t_host_const t_float_scalar_const;
+typedef tdual_float_scalar::t_host_um t_float_scalar_um;
+typedef tdual_float_scalar::t_host_const_um t_float_scalar_const_um;
+
+//Generic ArrayTypes
+typedef Kokkos::DualView<int*, LMPDeviceType::array_layout, LMPDeviceType> tdual_int_1d;
+typedef tdual_int_1d::t_host t_int_1d;
+typedef tdual_int_1d::t_host_const t_int_1d_const;
+typedef tdual_int_1d::t_host_um t_int_1d_um;
+typedef tdual_int_1d::t_host_const_um t_int_1d_const_um;
+typedef tdual_int_1d::t_host_const_randomread t_int_1d_randomread;
+
+typedef Kokkos::DualView<int**, Kokkos::LayoutRight, LMPDeviceType> tdual_int_2d;
+typedef tdual_int_2d::t_host t_int_2d;
+typedef tdual_int_2d::t_host_const t_int_2d_const;
+typedef tdual_int_2d::t_host_um t_int_2d_um;
+typedef tdual_int_2d::t_host_const_um t_int_2d_const_um;
+typedef tdual_int_2d::t_host_const_randomread t_int_2d_randomread;
+
+typedef Kokkos::DualView<LAMMPS_NS::tagint*, LMPDeviceType::array_layout, LMPDeviceType> tdual_tagint_1d;
+typedef tdual_tagint_1d::t_host t_tagint_1d;
+typedef tdual_tagint_1d::t_host_const t_tagint_1d_const;
+typedef tdual_tagint_1d::t_host_um t_tagint_1d_um;
+typedef tdual_tagint_1d::t_host_const_um t_tagint_1d_const_um;
+typedef tdual_tagint_1d::t_host_const_randomread t_tagint_1d_randomread;
+
+//1d float array n
+typedef Kokkos::DualView<LMP_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_float_1d;
+typedef tdual_float_1d::t_host t_float_1d;
+typedef tdual_float_1d::t_host_const t_float_1d_const;
+typedef tdual_float_1d::t_host_um t_float_1d_um;
+typedef tdual_float_1d::t_host_const_um t_float_1d_const_um;
+typedef tdual_float_1d::t_host_const_randomread t_float_1d_randomread;
+
+//2d float array n
+typedef Kokkos::DualView<LMP_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_float_2d;
+typedef tdual_float_2d::t_host t_float_2d;
+typedef tdual_float_2d::t_host_const t_float_2d_const;
+typedef tdual_float_2d::t_host_um t_float_2d_um;
+typedef tdual_float_2d::t_host_const_um t_float_2d_const_um;
+typedef tdual_float_2d::t_host_const_randomread t_float_2d_randomread;
+
+//Position Types
+//1d X_FLOAT array n
+typedef Kokkos::DualView<X_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_xfloat_1d;
+typedef tdual_xfloat_1d::t_host t_xfloat_1d;
+typedef tdual_xfloat_1d::t_host_const t_xfloat_1d_const;
+typedef tdual_xfloat_1d::t_host_um t_xfloat_1d_um;
+typedef tdual_xfloat_1d::t_host_const_um t_xfloat_1d_const_um;
+typedef tdual_xfloat_1d::t_host_const_randomread t_xfloat_1d_randomread;
+
+//2d X_FLOAT array n*m
+typedef Kokkos::DualView<X_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_xfloat_2d;
+typedef tdual_xfloat_2d::t_host t_xfloat_2d;
+typedef tdual_xfloat_2d::t_host_const t_xfloat_2d_const;
+typedef tdual_xfloat_2d::t_host_um t_xfloat_2d_um;
+typedef tdual_xfloat_2d::t_host_const_um t_xfloat_2d_const_um;
+typedef tdual_xfloat_2d::t_host_const_randomread t_xfloat_2d_randomread;
+
+//2d X_FLOAT array n*3
+typedef Kokkos::DualView<X_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_x_array;
+typedef tdual_x_array::t_host t_x_array;
+typedef tdual_x_array::t_host_const t_x_array_const;
+typedef tdual_x_array::t_host_um t_x_array_um;
+typedef tdual_x_array::t_host_const_um t_x_array_const_um;
+typedef tdual_x_array::t_host_const_randomread t_x_array_randomread;
+
+//Velocity Types
+//1d V_FLOAT array n
+typedef Kokkos::DualView<V_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_vfloat_1d;
+typedef tdual_vfloat_1d::t_host t_vfloat_1d;
+typedef tdual_vfloat_1d::t_host_const t_vfloat_1d_const;
+typedef tdual_vfloat_1d::t_host_um t_vfloat_1d_um;
+typedef tdual_vfloat_1d::t_host_const_um t_vfloat_1d_const_um;
+typedef tdual_vfloat_1d::t_host_const_randomread t_vfloat_1d_randomread;
+
+//2d V_FLOAT array n*m
+typedef Kokkos::DualView<V_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_vfloat_2d;
+typedef tdual_vfloat_2d::t_host t_vfloat_2d;
+typedef tdual_vfloat_2d::t_host_const t_vfloat_2d_const;
+typedef tdual_vfloat_2d::t_host_um t_vfloat_2d_um;
+typedef tdual_vfloat_2d::t_host_const_um t_vfloat_2d_const_um;
+typedef tdual_vfloat_2d::t_host_const_randomread t_vfloat_2d_randomread;
+
+//2d V_FLOAT array n*3
+typedef Kokkos::DualView<V_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_v_array;
+//typedef Kokkos::DualView<V_FLOAT*[3], LMPDeviceType::array_layout, LMPDeviceType> tdual_v_array;
+typedef tdual_v_array::t_host t_v_array;
+typedef tdual_v_array::t_host_const t_v_array_const;
+typedef tdual_v_array::t_host_um t_v_array_um;
+typedef tdual_v_array::t_host_const_um t_v_array_const_um;
+typedef tdual_v_array::t_host_const_randomread t_v_array_randomread;
+
+//Force Types
+//1d F_FLOAT array n
+typedef Kokkos::DualView<F_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_ffloat_1d;
+typedef tdual_ffloat_1d::t_host t_ffloat_1d;
+typedef tdual_ffloat_1d::t_host_const t_ffloat_1d_const;
+typedef tdual_ffloat_1d::t_host_um t_ffloat_1d_um;
+typedef tdual_ffloat_1d::t_host_const_um t_ffloat_1d_const_um;
+typedef tdual_ffloat_1d::t_host_const_randomread t_ffloat_1d_randomread;
+
+//2d F_FLOAT array n*m
+typedef Kokkos::DualView<F_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_ffloat_2d;
+typedef tdual_ffloat_2d::t_host t_ffloat_2d;
+typedef tdual_ffloat_2d::t_host_const t_ffloat_2d_const;
+typedef tdual_ffloat_2d::t_host_um t_ffloat_2d_um;
+typedef tdual_ffloat_2d::t_host_const_um t_ffloat_2d_const_um;
+typedef tdual_ffloat_2d::t_host_const_randomread t_ffloat_2d_randomread;
+
+//2d F_FLOAT array n*3
+typedef Kokkos::DualView<F_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_f_array;
+//typedef Kokkos::DualView<F_FLOAT*[3], LMPDeviceType::array_layout, LMPDeviceType> tdual_f_array;
+typedef tdual_f_array::t_host t_f_array;
+typedef tdual_f_array::t_host_const t_f_array_const;
+typedef tdual_f_array::t_host_um t_f_array_um;
+typedef tdual_f_array::t_host_const_um t_f_array_const_um;
+typedef tdual_f_array::t_host_const_randomread t_f_array_randomread;
+
+//2d F_FLOAT array n*6 (for virial)
+typedef Kokkos::DualView<F_FLOAT*[6], Kokkos::LayoutRight, LMPDeviceType> tdual_virial_array;
+typedef tdual_virial_array::t_host t_virial_array;
+typedef tdual_virial_array::t_host_const t_virial_array_const;
+typedef tdual_virial_array::t_host_um t_virial_array_um;
+typedef tdual_virial_array::t_host_const_um t_virial_array_const_um;
+typedef tdual_virial_array::t_host_const_randomread t_virial_array_randomread;
+
+
+
+//Energy Types
+//1d E_FLOAT array n
+typedef Kokkos::DualView<E_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_efloat_1d;
+typedef tdual_efloat_1d::t_host t_efloat_1d;
+typedef tdual_efloat_1d::t_host_const t_efloat_1d_const;
+typedef tdual_efloat_1d::t_host_um t_efloat_1d_um;
+typedef tdual_efloat_1d::t_host_const_um t_efloat_1d_const_um;
+typedef tdual_efloat_1d::t_host_const_randomread t_efloat_1d_randomread;
+
+//2d E_FLOAT array n*m
+typedef Kokkos::DualView<E_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_efloat_2d;
+typedef tdual_efloat_2d::t_host t_efloat_2d;
+typedef tdual_efloat_2d::t_host_const t_efloat_2d_const;
+typedef tdual_efloat_2d::t_host_um t_efloat_2d_um;
+typedef tdual_efloat_2d::t_host_const_um t_efloat_2d_const_um;
+typedef tdual_efloat_2d::t_host_const_randomread t_efloat_2d_randomread;
+
+//2d E_FLOAT array n*3
+typedef Kokkos::DualView<E_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_e_array;
+typedef tdual_e_array::t_host t_e_array;
+typedef tdual_e_array::t_host_const t_e_array_const;
+typedef tdual_e_array::t_host_um t_e_array_um;
+typedef tdual_e_array::t_host_const_um t_e_array_const_um;
+typedef tdual_e_array::t_host_const_randomread t_e_array_randomread;
+
+//Neighbor Types
+typedef Kokkos::DualView<int**, LMPDeviceType::array_layout, LMPDeviceType> tdual_neighbors_2d;
+typedef tdual_neighbors_2d::t_host t_neighbors_2d;
+typedef tdual_neighbors_2d::t_host_const t_neighbors_2d_const;
+typedef tdual_neighbors_2d::t_host_um t_neighbors_2d_um;
+typedef tdual_neighbors_2d::t_host_const_um t_neighbors_2d_const_um;
+typedef tdual_neighbors_2d::t_host_const_randomread t_neighbors_2d_randomread;
+
+};
+#endif
+//default LAMMPS Types
+typedef struct ArrayTypes<LMPDeviceType> DAT;
+typedef struct ArrayTypes<LMPHostType> HAT;
+
+template<class DeviceType, class BufferView, class DualView>
+void buffer_view(BufferView &buf, DualView &view,
+                 const size_t n0,
+                 const size_t n1 = 0,
+                 const size_t n2 = 0,
+                 const size_t n3 = 0,
+                 const size_t n4 = 0,
+                 const size_t n5 = 0,
+                 const size_t n6 = 0,
+                 const size_t n7 = 0) {
+
+  buf = BufferView(
+          view.template view<DeviceType>().ptr_on_device(),
+          n0,n1,n2,n3,n4,n5,n6,n7);
+
+}
+
+template<class DeviceType>
+struct MemsetZeroFunctor {
+  typedef DeviceType  device_type ;
+  void* ptr;
+  KOKKOS_INLINE_FUNCTION void operator()(const int i) const {
+    ((int*)ptr)[i] = 0;
+  }
+};
+
+template<class ViewType>
+void memset_kokkos (ViewType &view) {
+  static MemsetZeroFunctor<typename ViewType::device_type> f;
+  f.ptr = view.ptr_on_device();
+  Kokkos::parallel_for(view.capacity()*sizeof(typename ViewType::value_type)/4, f);
+  ViewType::device_type::fence();
+}
+
+
+#endif
diff --git a/src/KOKKOS/memory_kokkos.h b/src/KOKKOS/memory_kokkos.h
new file mode 100644
index 0000000000..2651c5e5c0
--- /dev/null
+++ b/src/KOKKOS/memory_kokkos.h
@@ -0,0 +1,208 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Kokkos versions of create/grow/destroy multi-dimensional arrays
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   create a 1d array
+------------------------------------------------------------------------- */
+
+template <typename TYPE>
+TYPE create_kokkos(TYPE &data, typename TYPE::value_type *&array, 
+                   int n1, const char *name)
+{
+  data = TYPE(name,n1);
+  array = data.h_view.ptr_on_device();
+  return data;
+}
+
+template <typename TYPE, typename HTYPE>
+  TYPE create_kokkos(TYPE &data, HTYPE &h_data, 
+                     typename TYPE::value_type *&array, int n1, 
+                     const char *name)
+{
+  data = TYPE(std::string(name),n1);
+#ifndef KOKKOS_USE_UVM
+  h_data = Kokkos::create_mirror_view(data);
+#else
+  h_data = data;
+#endif
+  array = h_data.ptr_on_device();
+  return data;
+}
+
+
+template <typename TYPE, typename HTYPE>
+  TYPE create_kokkos(TYPE &data, HTYPE &h_data,
+                     int n1, const char *name)
+{
+  data = TYPE(std::string(name),n1);
+#ifndef KOKKOS_USE_UVM
+  h_data = Kokkos::create_mirror_view(data);
+#else
+  h_data = data;
+#endif
+  return data;
+}
+
+/* ----------------------------------------------------------------------
+   grow or shrink 1st dim of a 1d array
+   last dim must stay the same
+------------------------------------------------------------------------- */
+
+template <typename TYPE>
+TYPE grow_kokkos(TYPE &data, typename TYPE::value_type *&array, 
+                 int n1, const char *name)
+{
+  if (array == NULL) return create_kokkos(data,array,n1,name);
+  
+  data.resize(n1);
+  array = data.h_view.ptr_on_device();
+  return data;
+}
+
+template <typename TYPE>
+void destroy_kokkos(TYPE data, typename TYPE::value_type* &array)
+{
+  if (array == NULL) return;
+  data = TYPE();
+  array = NULL;
+}
+
+/* ----------------------------------------------------------------------
+   create a 2d array
+------------------------------------------------------------------------- */
+
+template <typename TYPE>
+TYPE create_kokkos(TYPE &data, int n1, int n2, const char *name)
+{
+  data = TYPE(name,n1,n2);
+  return data;
+}
+
+template <typename TYPE, typename HTYPE>
+  TYPE create_kokkos(TYPE &data, HTYPE &h_data, int n1, int n2, 
+                     const char *name)
+{
+  data = TYPE(std::string(name),n1,n2);
+#ifndef KOKKOS_USE_UVM
+  h_data = Kokkos::create_mirror_view(data);
+#else
+  h_data = data;
+#endif
+  return data;
+}
+
+template <typename TYPE>
+TYPE create_kokkos(TYPE &data, typename TYPE::value_type **&array, 
+                   int n1, int n2, const char *name)
+{
+  data = TYPE(std::string(name),n1,n2);
+  bigint nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n1;
+  array = (typename TYPE::value_type **) smalloc(nbytes,name);
+  
+  bigint n = 0;
+  for (int i = 0; i < n1; i++) {
+    array[i] = &data.h_view(i,0);
+    n += n2;
+  }
+  return data;
+}
+
+template <typename TYPE, typename HTYPE>
+  TYPE create_kokkos(TYPE &data, HTYPE &h_data, 
+                     typename TYPE::value_type **&array, int n1, int n2, 
+                     const char *name)
+{
+  data = TYPE(std::string(name),n1,n2);
+#ifndef KOKKOS_USE_UVM
+  h_data = Kokkos::create_mirror_view(data);
+#else
+  h_data = data;
+#endif
+  bigint nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n1;
+  array = (typename TYPE::value_type **) smalloc(nbytes,name);
+  
+  bigint n = 0;
+  for (int i = 0; i < n1; i++) {
+    array[i] = &h_data(i,0);
+    n += n2;
+  }
+  return data;
+}
+
+/* ----------------------------------------------------------------------
+   grow or shrink 1st dim of a 2d array
+   last dim must stay the same
+------------------------------------------------------------------------- */
+
+template <typename TYPE>
+TYPE grow_kokkos(TYPE &data, typename TYPE::value_type **&array, 
+                 int n1, int n2, const char *name)
+{
+  if (array == NULL) return create_kokkos(data,array,n1,n2,name);
+  data.resize(n1,n2);
+  bigint nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n1;
+  array = (typename TYPE::value_type**) srealloc(array,nbytes,name);
+  
+  for (int i = 0; i < n1; i++)
+    array[i] = &data.h_view(i,0);
+  
+  return data;
+}
+
+template <typename TYPE>
+TYPE create_kokkos(TYPE &data, typename TYPE::value_type **&array, 
+                   int n1, const char *name)
+{
+  data = TYPE(std::string(name),n1);
+  bigint nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n1;
+  array = (typename TYPE::value_type **) smalloc(nbytes,name);
+  
+  for (int i = 0; i < n1; i++)
+    array[i] = &data.h_view(i,0);
+  
+  return data;
+}
+
+template <typename TYPE>
+TYPE grow_kokkos(TYPE &data, typename TYPE::value_type **&array, 
+                 int n1, const char *name)
+{
+  if (array == NULL) return create_kokkos(data,array,n1,name);
+  
+  data.resize(n1);
+  
+  bigint nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n1;
+  array = (typename TYPE::value_type **) smalloc(nbytes,name);
+  
+  for (int i = 0; i < n1; i++)
+    array[i] = &data.h_view(i,0);
+  
+  return data;
+}
+
+/* ----------------------------------------------------------------------
+   destroy a 2d array
+------------------------------------------------------------------------- */
+
+template <typename TYPE>
+void destroy_kokkos(TYPE data, typename TYPE::value_type** &array)
+{
+  if (array == NULL) return;
+  data = TYPE();
+  sfree(array);
+  array = NULL;
+}
diff --git a/src/KOKKOS/modify_kokkos.cpp b/src/KOKKOS/modify_kokkos.cpp
new file mode 100644
index 0000000000..4fcd136156
--- /dev/null
+++ b/src/KOKKOS/modify_kokkos.cpp
@@ -0,0 +1,585 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "modify_kokkos.h"
+#include "atom_kokkos.h"
+#include "update.h"
+#include "fix.h"
+#include "compute.h"
+
+using namespace LAMMPS_NS;
+
+#define BIG 1.0e20
+
+/* ---------------------------------------------------------------------- */
+
+ModifyKokkos::ModifyKokkos(LAMMPS *lmp) : Modify(lmp) 
+{
+  atomKK = (AtomKokkos *) atom;
+}
+
+/* ----------------------------------------------------------------------
+   setup for run, calls setup() of all fixes and computes
+   called from Verlet, RESPA, Min
+------------------------------------------------------------------------- */
+
+void ModifyKokkos::setup(int vflag)
+{
+  // compute setup needs to come before fix setup
+  // b/c NH fixes need use DOF of temperature computes
+
+  for (int i = 0; i < ncompute; i++) compute[i]->setup();
+
+  if (update->whichflag == 1)
+    for (int i = 0; i < nfix; i++) {
+      atomKK->sync(fix[i]->execution_space,fix[i]->datamask_read);
+      atomKK->modified(fix[i]->execution_space,fix[i]->datamask_modify);
+      fix[i]->setup(vflag);
+    }
+  else if (update->whichflag == 2)
+    for (int i = 0; i < nfix; i++) {
+      atomKK->sync(fix[i]->execution_space,fix[i]->datamask_read);
+      atomKK->modified(fix[i]->execution_space,fix[i]->datamask_modify);
+      fix[i]->min_setup(vflag);
+    }
+}
+
+/* ----------------------------------------------------------------------
+   setup pre_exchange call, only for fixes that define pre_exchange
+   called from Verlet, RESPA, Min, and WriteRestart with whichflag = 0
+------------------------------------------------------------------------- */
+
+void ModifyKokkos::setup_pre_exchange()
+{
+  if (update->whichflag <= 1)
+    for (int i = 0; i < n_pre_exchange; i++) {
+      atomKK->sync(fix[list_pre_exchange[i]]->execution_space,
+                   fix[list_pre_exchange[i]]->datamask_read);
+      atomKK->modified(fix[list_pre_exchange[i]]->execution_space,
+                       fix[list_pre_exchange[i]]->datamask_modify);
+      fix[list_pre_exchange[i]]->setup_pre_exchange();
+    }
+  else if (update->whichflag == 2)
+    for (int i = 0; i < n_min_pre_exchange; i++) {
+      atomKK->sync(fix[list_min_pre_exchange[i]]->execution_space,
+                   fix[list_min_pre_exchange[i]]->datamask_read);
+      atomKK->modified(fix[list_min_pre_exchange[i]]->execution_space,
+                       fix[list_min_pre_exchange[i]]->datamask_modify);
+      fix[list_min_pre_exchange[i]]->min_setup_pre_exchange();
+    }
+}
+
+/* ----------------------------------------------------------------------
+   setup pre_neighbor call, only for fixes that define pre_neighbor
+   called from Verlet, RESPA
+------------------------------------------------------------------------- */
+
+void ModifyKokkos::setup_pre_neighbor()
+{
+  if (update->whichflag == 1)
+    for (int i = 0; i < n_pre_neighbor; i++) {
+      atomKK->sync(fix[list_pre_neighbor[i]]->execution_space,
+                   fix[list_pre_neighbor[i]]->datamask_read);
+      atomKK->modified(fix[list_pre_neighbor[i]]->execution_space,
+                       fix[list_pre_neighbor[i]]->datamask_modify);
+      fix[list_pre_neighbor[i]]->setup_pre_neighbor();
+    }
+  else if (update->whichflag == 2)
+    for (int i = 0; i < n_min_pre_neighbor; i++) {
+      atomKK->sync(fix[list_min_pre_neighbor[i]]->execution_space,
+                   fix[list_min_pre_neighbor[i]]->datamask_read);
+      atomKK->modified(fix[list_min_pre_neighbor[i]]->execution_space,
+                       fix[list_min_pre_neighbor[i]]->datamask_modify);
+      fix[list_min_pre_neighbor[i]]->min_setup_pre_neighbor();
+    }
+}
+
+/* ----------------------------------------------------------------------
+   setup pre_force call, only for fixes that define pre_force
+   called from Verlet, RESPA, Min
+------------------------------------------------------------------------- */
+
+void ModifyKokkos::setup_pre_force(int vflag)
+{
+  if (update->whichflag == 1)
+    for (int i = 0; i < n_pre_force; i++) {
+      atomKK->sync(fix[list_pre_force[i]]->execution_space,
+                   fix[list_pre_force[i]]->datamask_read);
+      atomKK->modified(fix[list_pre_force[i]]->execution_space,
+                       fix[list_pre_force[i]]->datamask_modify);
+      fix[list_pre_force[i]]->setup_pre_force(vflag);
+    }
+  else if (update->whichflag == 2)
+    for (int i = 0; i < n_min_pre_force; i++) {
+      atomKK->sync(fix[list_min_pre_force[i]]->execution_space,
+                   fix[list_min_pre_force[i]]->datamask_read);
+      atomKK->modified(fix[list_min_pre_force[i]]->execution_space,
+                       fix[list_min_pre_force[i]]->datamask_modify);
+      fix[list_min_pre_force[i]]->min_setup_pre_force(vflag);
+    }
+}
+
+/* ----------------------------------------------------------------------
+   1st half of integrate call, only for relevant fixes
+------------------------------------------------------------------------- */
+
+void ModifyKokkos::initial_integrate(int vflag)
+{
+  for (int i = 0; i < n_initial_integrate; i++) {
+    atomKK->sync(fix[list_initial_integrate[i]]->execution_space,
+                 fix[list_initial_integrate[i]]->datamask_read);
+    atomKK->modified(fix[list_initial_integrate[i]]->execution_space,
+                     fix[list_initial_integrate[i]]->datamask_modify);
+    fix[list_initial_integrate[i]]->initial_integrate(vflag);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   post_integrate call, only for relevant fixes
+------------------------------------------------------------------------- */
+
+void ModifyKokkos::post_integrate()
+{
+  for (int i = 0; i < n_post_integrate; i++) {
+    atomKK->sync(fix[list_post_integrate[i]]->execution_space,
+                 fix[list_post_integrate[i]]->datamask_read);
+    atomKK->modified(fix[list_post_integrate[i]]->execution_space,
+                     fix[list_post_integrate[i]]->datamask_modify);
+    fix[list_post_integrate[i]]->post_integrate();
+  }
+}
+
+/* ----------------------------------------------------------------------
+   pre_exchange call, only for relevant fixes
+------------------------------------------------------------------------- */
+
+void ModifyKokkos::pre_exchange()
+{
+  for (int i = 0; i < n_pre_exchange; i++) {
+    atomKK->sync(fix[list_pre_exchange[i]]->execution_space,
+                 fix[list_pre_exchange[i]]->datamask_read);
+    atomKK->modified(fix[list_pre_exchange[i]]->execution_space,
+                     fix[list_pre_exchange[i]]->datamask_modify);
+    fix[list_pre_exchange[i]]->pre_exchange();
+  }
+}
+
+/* ----------------------------------------------------------------------
+   pre_neighbor call, only for relevant fixes
+------------------------------------------------------------------------- */
+
+void ModifyKokkos::pre_neighbor()
+{
+  for (int i = 0; i < n_pre_neighbor; i++) {
+    atomKK->sync(fix[list_pre_neighbor[i]]->execution_space,
+                 fix[list_pre_neighbor[i]]->datamask_read);
+    atomKK->modified(fix[list_pre_neighbor[i]]->execution_space,
+                     fix[list_pre_neighbor[i]]->datamask_modify);
+    fix[list_pre_neighbor[i]]->pre_neighbor();
+  }
+}
+
+/* ----------------------------------------------------------------------
+   pre_force call, only for relevant fixes
+------------------------------------------------------------------------- */
+
+void ModifyKokkos::pre_force(int vflag)
+{
+  for (int i = 0; i < n_pre_force; i++) {
+    atomKK->sync(fix[list_pre_force[i]]->execution_space,
+                 fix[list_pre_force[i]]->datamask_read);
+    atomKK->modified(fix[list_pre_force[i]]->execution_space,
+                     fix[list_pre_force[i]]->datamask_modify);
+    fix[list_pre_force[i]]->pre_force(vflag);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   post_force call, only for relevant fixes
+------------------------------------------------------------------------- */
+
+void ModifyKokkos::post_force(int vflag)
+{
+  for (int i = 0; i < n_post_force; i++) {
+    atomKK->sync(fix[list_post_force[i]]->execution_space,
+                 fix[list_post_force[i]]->datamask_read);
+    atomKK->modified(fix[list_post_force[i]]->execution_space,
+                     fix[list_post_force[i]]->datamask_modify);
+    fix[list_post_force[i]]->post_force(vflag);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   2nd half of integrate call, only for relevant fixes
+------------------------------------------------------------------------- */
+
+void ModifyKokkos::final_integrate()
+{
+  for (int i = 0; i < n_final_integrate; i++) {
+    atomKK->sync(fix[list_final_integrate[i]]->execution_space,
+                 fix[list_final_integrate[i]]->datamask_read);
+    atomKK->modified(fix[list_final_integrate[i]]->execution_space,
+                     fix[list_final_integrate[i]]->datamask_modify);
+    fix[list_final_integrate[i]]->final_integrate();
+  }
+}
+
+/* ----------------------------------------------------------------------
+   end-of-timestep call, only for relevant fixes
+   only call fix->end_of_step() on timesteps that are multiples of nevery
+------------------------------------------------------------------------- */
+
+void ModifyKokkos::end_of_step()
+{
+  for (int i = 0; i < n_end_of_step; i++)
+    if (update->ntimestep % end_of_step_every[i] == 0) {
+      atomKK->sync(fix[list_end_of_step[i]]->execution_space,
+                   fix[list_end_of_step[i]]->datamask_read);
+      atomKK->modified(fix[list_end_of_step[i]]->execution_space,
+                       fix[list_end_of_step[i]]->datamask_modify);
+      fix[list_end_of_step[i]]->end_of_step();
+    }
+}
+
+/* ----------------------------------------------------------------------
+   thermo energy call, only for relevant fixes
+   called by Thermo class
+   compute_scalar() is fix call to return energy
+------------------------------------------------------------------------- */
+
+double ModifyKokkos::thermo_energy()
+{
+  double energy = 0.0;
+  for (int i = 0; i < n_thermo_energy; i++) {
+    atomKK->sync(fix[list_thermo_energy[i]]->execution_space,
+                 fix[list_thermo_energy[i]]->datamask_read);
+    atomKK->modified(fix[list_thermo_energy[i]]->execution_space,
+                     fix[list_thermo_energy[i]]->datamask_modify);
+    energy += fix[list_thermo_energy[i]]->compute_scalar();
+  }
+  return energy;
+}
+
+/* ----------------------------------------------------------------------
+   post_run call
+------------------------------------------------------------------------- */
+
+void ModifyKokkos::post_run()
+{
+  for (int i = 0; i < nfix; i++) {
+    atomKK->sync(fix[i]->execution_space,
+                 fix[i]->datamask_read);
+    atomKK->modified(fix[i]->execution_space,
+                     fix[i]->datamask_modify);
+    fix[i]->post_run();
+  }
+}
+
+/* ----------------------------------------------------------------------
+   setup rRESPA pre_force call, only for relevant fixes
+------------------------------------------------------------------------- */
+
+void ModifyKokkos::setup_pre_force_respa(int vflag, int ilevel)
+{
+  for (int i = 0; i < n_pre_force; i++) {
+    atomKK->sync(fix[list_pre_force[i]]->execution_space,
+                 fix[list_pre_force[i]]->datamask_read);
+    atomKK->modified(fix[list_pre_force[i]]->execution_space,
+                     fix[list_pre_force[i]]->datamask_modify);
+    fix[list_pre_force[i]]->setup_pre_force_respa(vflag,ilevel);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   1st half of rRESPA integrate call, only for relevant fixes
+------------------------------------------------------------------------- */
+
+void ModifyKokkos::initial_integrate_respa(int vflag, int ilevel, int iloop)
+{
+  for (int i = 0; i < n_initial_integrate_respa; i++) {
+    atomKK->sync(fix[list_initial_integrate_respa[i]]->execution_space,
+                 fix[list_initial_integrate_respa[i]]->datamask_read);
+    atomKK->modified(fix[list_initial_integrate_respa[i]]->execution_space,
+                     fix[list_initial_integrate_respa[i]]->datamask_modify);
+    fix[list_initial_integrate_respa[i]]->
+      initial_integrate_respa(vflag,ilevel,iloop);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   rRESPA post_integrate call, only for relevant fixes
+------------------------------------------------------------------------- */
+
+void ModifyKokkos::post_integrate_respa(int ilevel, int iloop)
+{
+  for (int i = 0; i < n_post_integrate_respa; i++) {
+    atomKK->sync(fix[list_post_integrate_respa[i]]->execution_space,
+                 fix[list_post_integrate_respa[i]]->datamask_read);
+    atomKK->modified(fix[list_post_integrate_respa[i]]->execution_space,
+                     fix[list_post_integrate_respa[i]]->datamask_modify);
+    fix[list_post_integrate_respa[i]]->post_integrate_respa(ilevel,iloop);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   rRESPA pre_force call, only for relevant fixes
+------------------------------------------------------------------------- */
+
+void ModifyKokkos::pre_force_respa(int vflag, int ilevel, int iloop)
+{
+  for (int i = 0; i < n_pre_force_respa; i++) {
+    atomKK->sync(fix[list_pre_force_respa[i]]->execution_space,
+                 fix[list_pre_force_respa[i]]->datamask_read);
+    atomKK->modified(fix[list_pre_force_respa[i]]->execution_space,
+                     fix[list_pre_force_respa[i]]->datamask_modify);
+    fix[list_pre_force_respa[i]]->pre_force_respa(vflag,ilevel,iloop);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   rRESPA post_force call, only for relevant fixes
+------------------------------------------------------------------------- */
+
+void ModifyKokkos::post_force_respa(int vflag, int ilevel, int iloop)
+{
+  for (int i = 0; i < n_post_force_respa; i++) {
+    atomKK->sync(fix[list_post_force_respa[i]]->execution_space,
+                 fix[list_post_force_respa[i]]->datamask_read);
+    atomKK->modified(fix[list_post_force_respa[i]]->execution_space,
+                     fix[list_post_force_respa[i]]->datamask_modify);
+    fix[list_post_force_respa[i]]->post_force_respa(vflag,ilevel,iloop);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   2nd half of rRESPA integrate call, only for relevant fixes
+------------------------------------------------------------------------- */
+
+void ModifyKokkos::final_integrate_respa(int ilevel, int iloop)
+{
+  for (int i = 0; i < n_final_integrate_respa; i++) {
+    atomKK->sync(fix[list_final_integrate_respa[i]]->execution_space,
+                 fix[list_final_integrate_respa[i]]->datamask_read);
+    atomKK->modified(fix[list_final_integrate_respa[i]]->execution_space,
+                     fix[list_final_integrate_respa[i]]->datamask_modify);
+    fix[list_final_integrate_respa[i]]->final_integrate_respa(ilevel,iloop);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   minimizer pre-exchange call, only for relevant fixes
+------------------------------------------------------------------------- */
+
+void ModifyKokkos::min_pre_exchange()
+{
+  for (int i = 0; i < n_min_pre_exchange; i++) {
+    atomKK->sync(fix[list_min_pre_exchange[i]]->execution_space,
+                 fix[list_min_pre_exchange[i]]->datamask_read);
+    atomKK->modified(fix[list_min_pre_exchange[i]]->execution_space,
+                     fix[list_min_pre_exchange[i]]->datamask_modify);
+    fix[list_min_pre_exchange[i]]->min_pre_exchange();
+  }
+}
+
+/* ----------------------------------------------------------------------
+   minimizer pre-neighbor call, only for relevant fixes
+------------------------------------------------------------------------- */
+
+void ModifyKokkos::min_pre_neighbor()
+{
+  for (int i = 0; i < n_min_pre_neighbor; i++) {
+    atomKK->sync(fix[list_min_pre_neighbor[i]]->execution_space,
+                 fix[list_min_pre_neighbor[i]]->datamask_read);
+    atomKK->modified(fix[list_min_pre_neighbor[i]]->execution_space,
+                     fix[list_min_pre_neighbor[i]]->datamask_modify);
+    fix[list_min_pre_neighbor[i]]->min_pre_neighbor();
+  }
+}
+
+/* ----------------------------------------------------------------------
+   minimizer pre-force call, only for relevant fixes
+------------------------------------------------------------------------- */
+
+void ModifyKokkos::min_pre_force(int vflag)
+{
+  for (int i = 0; i < n_min_pre_force; i++) {
+    atomKK->sync(fix[list_min_pre_force[i]]->execution_space,
+                 fix[list_min_pre_force[i]]->datamask_read);
+    atomKK->modified(fix[list_min_pre_force[i]]->execution_space,
+                     fix[list_min_pre_force[i]]->datamask_modify);
+    fix[list_min_pre_force[i]]->min_pre_force(vflag);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   minimizer force adjustment call, only for relevant fixes
+------------------------------------------------------------------------- */
+
+void ModifyKokkos::min_post_force(int vflag)
+{
+  for (int i = 0; i < n_min_post_force; i++) {
+    atomKK->sync(fix[list_min_post_force[i]]->execution_space,
+                 fix[list_min_post_force[i]]->datamask_read);
+    atomKK->modified(fix[list_min_post_force[i]]->execution_space,
+                     fix[list_min_post_force[i]]->datamask_modify);
+    fix[list_min_post_force[i]]->min_post_force(vflag);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   minimizer energy/force evaluation, only for relevant fixes
+   return energy and forces on extra degrees of freedom
+------------------------------------------------------------------------- */
+
+double ModifyKokkos::min_energy(double *fextra)
+{
+  int ifix,index;
+
+  index = 0;
+  double eng = 0.0;
+  for (int i = 0; i < n_min_energy; i++) {
+    ifix = list_min_energy[i];
+    atomKK->sync(fix[ifix]->execution_space,fix[ifix]->datamask_read);
+    atomKK->modified(fix[ifix]->execution_space,fix[ifix]->datamask_modify);
+    eng += fix[ifix]->min_energy(&fextra[index]);
+    index += fix[ifix]->min_dof();
+  }
+  return eng;
+}
+
+/* ----------------------------------------------------------------------
+   store current state of extra dof, only for relevant fixes
+------------------------------------------------------------------------- */
+
+void ModifyKokkos::min_store()
+{
+  for (int i = 0; i < n_min_energy; i++) {
+    atomKK->sync(fix[list_min_energy[i]]->execution_space,
+                 fix[list_min_energy[i]]->datamask_read);
+    atomKK->modified(fix[list_min_energy[i]]->execution_space,
+                     fix[list_min_energy[i]]->datamask_modify);
+    fix[list_min_energy[i]]->min_store();
+  }
+}
+
+/* ----------------------------------------------------------------------
+   mange state of extra dof on a stack, only for relevant fixes
+------------------------------------------------------------------------- */
+
+void ModifyKokkos::min_clearstore()
+{
+  for (int i = 0; i < n_min_energy; i++) {
+    atomKK->sync(fix[list_min_energy[i]]->execution_space,
+                 fix[list_min_energy[i]]->datamask_read);
+    atomKK->modified(fix[list_min_energy[i]]->execution_space,
+                     fix[list_min_energy[i]]->datamask_modify);
+    fix[list_min_energy[i]]->min_clearstore();
+  }
+}
+
+void ModifyKokkos::min_pushstore()
+{
+  for (int i = 0; i < n_min_energy; i++) {
+    atomKK->sync(fix[list_min_energy[i]]->execution_space,
+                 fix[list_min_energy[i]]->datamask_read);
+    atomKK->modified(fix[list_min_energy[i]]->execution_space,
+                     fix[list_min_energy[i]]->datamask_modify);
+    fix[list_min_energy[i]]->min_pushstore();
+  }
+}
+
+void ModifyKokkos::min_popstore()
+{
+  for (int i = 0; i < n_min_energy; i++) {
+    atomKK->sync(fix[list_min_energy[i]]->execution_space,
+                 fix[list_min_energy[i]]->datamask_read);
+    atomKK->modified(fix[list_min_energy[i]]->execution_space,
+                     fix[list_min_energy[i]]->datamask_modify);
+    fix[list_min_energy[i]]->min_popstore();
+  }
+}
+
+/* ----------------------------------------------------------------------
+   displace extra dof along vector hextra, only for relevant fixes
+------------------------------------------------------------------------- */
+
+void ModifyKokkos::min_step(double alpha, double *hextra)
+{
+  int ifix,index;
+
+  index = 0;
+  for (int i = 0; i < n_min_energy; i++) {
+    ifix = list_min_energy[i];
+    atomKK->sync(fix[ifix]->execution_space,fix[ifix]->datamask_read);
+    atomKK->modified(fix[ifix]->execution_space,fix[ifix]->datamask_modify);
+    fix[ifix]->min_step(alpha,&hextra[index]);
+    index += fix[ifix]->min_dof();
+  }
+}
+
+/* ----------------------------------------------------------------------
+   compute max allowed step size along vector hextra, only for relevant fixes
+------------------------------------------------------------------------- */
+
+double ModifyKokkos::max_alpha(double *hextra)
+{
+  int ifix,index;
+
+  double alpha = BIG;
+  index = 0;
+  for (int i = 0; i < n_min_energy; i++) {
+    ifix = list_min_energy[i];
+    atomKK->sync(fix[ifix]->execution_space,fix[ifix]->datamask_read);
+    atomKK->modified(fix[ifix]->execution_space,fix[ifix]->datamask_modify);
+    double alpha_one = fix[ifix]->max_alpha(&hextra[index]);
+    alpha = MIN(alpha,alpha_one);
+    index += fix[ifix]->min_dof();
+  }
+  return alpha;
+}
+
+/* ----------------------------------------------------------------------
+   extract extra dof for minimization, only for relevant fixes
+------------------------------------------------------------------------- */
+
+int ModifyKokkos::min_dof()
+{
+  int ndof = 0;
+  for (int i = 0; i < n_min_energy; i++) {
+    atomKK->sync(fix[list_min_energy[i]]->execution_space,
+                 fix[list_min_energy[i]]->datamask_read);
+    atomKK->modified(fix[list_min_energy[i]]->execution_space,
+                     fix[list_min_energy[i]]->datamask_modify);
+    ndof += fix[list_min_energy[i]]->min_dof();
+  }
+  return ndof;
+}
+
+/* ----------------------------------------------------------------------
+   reset reference state of fix, only for relevant fixes
+------------------------------------------------------------------------- */
+
+int ModifyKokkos::min_reset_ref()
+{
+  int itmp,itmpall;
+  itmpall = 0;
+  for (int i = 0; i < n_min_energy; i++) {
+    atomKK->sync(fix[list_min_energy[i]]->execution_space,
+                 fix[list_min_energy[i]]->datamask_read);
+    atomKK->modified(fix[list_min_energy[i]]->execution_space,
+                     fix[list_min_energy[i]]->datamask_modify);
+    itmp = fix[list_min_energy[i]]->min_reset_ref();
+    if (itmp) itmpall = 1;
+  }
+  return itmpall;
+}
diff --git a/src/KOKKOS/modify_kokkos.h b/src/KOKKOS/modify_kokkos.h
new file mode 100644
index 0000000000..c0c3a8d680
--- /dev/null
+++ b/src/KOKKOS/modify_kokkos.h
@@ -0,0 +1,73 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef LMP_MODIFY_KOKKOS_H
+#define LMP_MODIFY_KOKKOS_H
+
+#include "modify.h"
+
+namespace LAMMPS_NS {
+
+class ModifyKokkos : public Modify {
+ public:
+  ModifyKokkos(class LAMMPS *);
+  ~ModifyKokkos() {}
+  void setup(int);
+  void setup_pre_exchange();
+  void setup_pre_neighbor();
+  void setup_pre_force(int);
+  void initial_integrate(int);
+  void post_integrate();
+  void pre_decide();
+  void pre_exchange();
+  void pre_neighbor();
+  void pre_force(int);
+  void post_force(int);
+  void final_integrate();
+  void end_of_step();
+  double thermo_energy();
+  void post_run();
+
+  void setup_pre_force_respa(int, int);
+  void initial_integrate_respa(int, int, int);
+  void post_integrate_respa(int, int);
+  void pre_force_respa(int, int, int);
+  void post_force_respa(int, int, int);
+  void final_integrate_respa(int, int);
+
+  void min_pre_exchange();
+  void min_pre_neighbor();
+  void min_pre_force(int);
+  void min_post_force(int);
+
+  double min_energy(double *);
+  void min_store();
+  void min_step(double, double *);
+  void min_clearstore();
+  void min_pushstore();
+  void min_popstore();
+  double max_alpha(double *);
+  int min_dof();
+  int min_reset_ref();
+
+ protected:
+  class AtomKokkos *atomKK;
+};
+
+}
+
+#endif
+
+/* ERROR/WARNING messages:
+
+*/
diff --git a/src/KOKKOS/neigh_full_kokkos.h b/src/KOKKOS/neigh_full_kokkos.h
new file mode 100644
index 0000000000..9112e5049a
--- /dev/null
+++ b/src/KOKKOS/neigh_full_kokkos.h
@@ -0,0 +1,507 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "atom_kokkos.h"
+#include "atom_masks.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType, int HALF_NEIGH>
+void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
+{
+  const int nall = includegroup?atom->nfirst:atom->nlocal;
+  list->grow(nall);
+
+  NeighborKokkosExecute<DeviceType> 
+    data(*list,
+         k_cutneighsq.view<DeviceType>(),
+         k_bincount.view<DeviceType>(),
+         k_bins.view<DeviceType>(),nall,
+         atomKK->k_x.view<DeviceType>(),
+         atomKK->k_type.view<DeviceType>(),
+         atomKK->k_mask.view<DeviceType>(),
+         atomKK->k_molecule.view<DeviceType>(),
+         nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo,
+         bininvx,bininvy,bininvz,
+         bboxhi,bboxlo);
+
+  k_cutneighsq.sync<DeviceType>();
+  atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK);
+  Kokkos::deep_copy(list->d_stencil,list->h_stencil);
+
+  while(data.h_resize() > 0) {
+    data.h_resize() = 0;
+    deep_copy(data.resize, data.h_resize);
+
+    MemsetZeroFunctor<DeviceType> f_zero;
+    f_zero.ptr = (void*) k_bincount.view<DeviceType>().ptr_on_device();
+    Kokkos::parallel_for(mbins, f_zero);
+    DeviceType::fence();
+
+    NeighborKokkosBinAtomsFunctor<DeviceType> f(data);
+
+    Kokkos::parallel_for(atom->nlocal+atom->nghost, f);
+    DeviceType::fence();
+
+    deep_copy(data.h_resize, data.resize);
+    if(data.h_resize()) {
+
+      atoms_per_bin += 16;
+      k_bins = DAT::tdual_int_2d("bins", mbins, atoms_per_bin);
+      data.bins = k_bins.view<DeviceType>();
+      data.c_bins = data.bins;
+    }
+  }
+
+  if(list->d_neighbors.dimension_0()<nall) {
+    list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", nall*1.1, list->maxneighs);
+    list->d_numneigh = typename ArrayTypes<DeviceType>::t_int_1d("numneigh", nall*1.1);
+    data.neigh_list.d_neighbors = list->d_neighbors;
+    data.neigh_list.d_numneigh = list->d_numneigh;
+  }
+  data.h_resize()=1;
+  while(data.h_resize()) {
+    data.h_new_maxneighs() = list->maxneighs;
+  data.h_resize() = 0;
+
+  Kokkos::deep_copy(data.resize, data.h_resize);
+  Kokkos::deep_copy(data.new_maxneighs, data.h_new_maxneighs);
+#if DEVICE==2
+    #define BINS_PER_BLOCK 2
+    const int factor = atoms_per_bin<64?2:1;
+    Kokkos::ParallelWorkRequest config((mbins+factor-1)/factor,atoms_per_bin*factor);
+#else
+    const int factor = 1;
+#endif
+
+if(newton_pair) {
+  NeighborKokkosBuildFunctor<DeviceType,HALF_NEIGH,1> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
+#if DEVICE==2
+  Kokkos::parallel_for(config, f);
+#else
+  Kokkos::parallel_for(nall, f);
+#endif
+} else {
+  NeighborKokkosBuildFunctor<DeviceType,HALF_NEIGH,0> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
+#if DEVICE==2
+  Kokkos::parallel_for(config, f);
+#else
+  Kokkos::parallel_for(nall, f);
+#endif
+}
+  DeviceType::fence();
+    deep_copy(data.h_resize, data.resize);
+
+    if(data.h_resize()) {
+      deep_copy(data.h_new_maxneighs, data.new_maxneighs);
+      list->maxneighs = data.h_new_maxneighs() * 1.2;
+      list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", list->d_neighbors.dimension_0(), list->maxneighs);
+      data.neigh_list.d_neighbors = list->d_neighbors;
+      data.neigh_list.maxneighs = list->maxneighs;
+    }
+  }
+
+  list->inum = nall;
+  list->gnum = 0;
+
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class Device>
+KOKKOS_INLINE_FUNCTION
+void NeighborKokkosExecute<Device>::binatomsItem(const int &i) const
+{
+  const int ibin = coord2bin(x(i, 0), x(i, 1), x(i, 2));
+
+  const int ac = Kokkos::atomic_fetch_add(&bincount[ibin], (int)1);
+  if(ac < bins.dimension_1()) {
+    bins(ibin, ac) = i;
+  } else {
+    resize() = 1;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class Device> template<int HalfNeigh,int GhostNewton>
+void NeighborKokkosExecute<Device>::
+   build_Item(const int &i) const
+{
+  /* if necessary, goto next page and add pages */
+  int n = 0;
+
+  // get subview of neighbors of i
+
+  const AtomNeighbors neighbors_i = neigh_list.get_neighbors(i);
+  const X_FLOAT xtmp = x(i, 0);
+  const X_FLOAT ytmp = x(i, 1);
+  const X_FLOAT ztmp = x(i, 2);
+  const int itype = type(i);
+
+  const int ibin = coord2bin(xtmp, ytmp, ztmp);
+
+  const int nstencil = neigh_list.nstencil;
+  const typename ArrayTypes<Device>::t_int_1d_const_um stencil
+    = neigh_list.d_stencil;
+
+  // loop over all bins in neighborhood (includes ibin)
+  if(HalfNeigh)
+  for(int m = 0; m < c_bincount(ibin); m++) {
+    const int j = c_bins(ibin,m);
+  // printf("%i %i %i\n",i,ibin,m,c_bincount(ibin),j);
+    const int jtype = type(j);
+    //for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using HalfNeighborlists
+    if((j == i) || (HalfNeigh && !GhostNewton && (j < i))  ||
+        (HalfNeigh && GhostNewton && ((j < i) || ((j >= nlocal) &&
+                                       ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
+                                        (x(j, 2) == ztmp && x(j, 1)  == ytmp && x(j, 0) < xtmp)))))
+      ) continue;
+    //if(Exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
+
+
+    const X_FLOAT delx = xtmp - x(j, 0);
+    const X_FLOAT dely = ytmp - x(j, 1);
+    const X_FLOAT delz = ztmp - x(j, 2);
+    const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
+    if(rsq <= cutneighsq(itype,jtype)) {
+      if(n<neigh_list.maxneighs) neighbors_i(n) = j;
+      n++;
+    }
+  }
+
+  for(int k = 0; k < nstencil; k++) {
+    const int jbin = ibin + stencil[k];
+    // get subview of jbin
+    if(!GhostNewton&&HalfNeigh&&(ibin==jbin)) continue;
+    //const ArrayTypes<Device>::t_int_1d_const_um =Kokkos::subview<t_int_1d_const_um>(bins,jbin,ALL);
+      for(int m = 0; m < c_bincount(jbin); m++) {
+        const int j = c_bins(jbin,m);
+        //if(i==0)
+        //printf("%i %i %i %i %i %i %i\n",i,jbin,m,c_bincount(jbin),j,k,stencil[k]);
+        const int jtype = type(j);
+
+        if(HalfNeigh && !GhostNewton && (j < i)) continue;
+        if(!HalfNeigh && j==i) continue;
+        //if(Exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
+
+        const X_FLOAT delx = xtmp - x(j, 0);
+        const X_FLOAT dely = ytmp - x(j, 1);
+        const X_FLOAT delz = ztmp - x(j, 2);
+        const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
+      //if(i==0)
+        //printf("%i %i %lf %lf NEIGHS\n",i,j,rsq,cutneighsq(itype,jtype));
+
+        if(rsq <= cutneighsq(itype,jtype)) {
+          if(n<neigh_list.maxneighs) neighbors_i(n) = j;
+          n++;
+        }
+
+      }
+  }
+
+  neigh_list.d_numneigh(i) = n;
+
+  if(n >= neigh_list.maxneighs) {
+    resize() = 1;
+
+    if(n >= new_maxneighs()) new_maxneighs() = n;
+  }
+  neigh_list.d_ilist(i) = i;
+}
+
+#if DEVICE==2
+extern __shared__ X_FLOAT sharedmem[];
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType> template<int HalfNeigh>
+__device__ inline
+void NeighborKokkosExecute<DeviceType>::build_ItemCuda(DeviceType dev) const
+{
+  /* loop over atoms in i's bin,
+  */
+  const int atoms_per_bin = c_bins.dimension_1();
+  const int BINS_PER_TEAM = blockDim.x/atoms_per_bin;
+  const int MY_BIN = threadIdx.x/atoms_per_bin;
+  const int MY_II = threadIdx.x%atoms_per_bin;
+
+  const int ibin = (blockIdx.x)*BINS_PER_TEAM+MY_BIN;
+
+  if(ibin >=c_bincount.dimension_0()) return;
+  X_FLOAT* other_x = sharedmem;
+  other_x = other_x + 5*atoms_per_bin*MY_BIN;
+
+  int* other_id = (int*) &other_x[4 * atoms_per_bin];
+
+  int bincount_current = c_bincount[ibin];
+
+  const int i = MY_II < bincount_current ? c_bins(ibin, MY_II) : -1;
+  /* if necessary, goto next page and add pages */
+
+  int n = 0;
+
+  X_FLOAT xtmp;
+  X_FLOAT ytmp;
+  X_FLOAT ztmp;
+  int itype;
+  const AtomNeighbors neighbors_i = neigh_list.get_neighbors((i>=0&&i<nlocal)?i:0);
+
+  if(i >= 0) {
+    xtmp = x(i, 0);
+    ytmp = x(i, 1);
+    ztmp = x(i, 2);
+    itype = type(i);
+    other_x[MY_II] = xtmp;
+    other_x[MY_II + atoms_per_bin] = ytmp;
+    other_x[MY_II + 2 * atoms_per_bin] = ztmp;
+    other_x[MY_II + 3 * atoms_per_bin] = itype;
+  }
+  other_id[MY_II] = i;
+  int test = (__syncthreads_count(i >= 0 && i <= nlocal) == 0);
+
+  if(test) return;
+
+  if(i >= 0 && i < nlocal) {
+    #pragma unroll 4
+    for(int m = 0; m < bincount_current; m++) {
+      int j = other_id[m];
+
+      //for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using halfneighborlists
+      //if(j==i) continue;
+      if((j == i) || (HalfNeigh && (j < i)))  continue;
+
+      const X_FLOAT delx = xtmp - other_x[m];
+      const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin];
+      const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin];
+      const int jtype = other_x[m + 3 * atoms_per_bin];
+      const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
+      if((rsq <= cutneighsq(itype,jtype)) && (n < neigh_list.maxneighs)) neighbors_i(n++) = j;
+    }
+  }
+  __syncthreads();
+
+  const int nstencil = neigh_list.nstencil;
+  const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil
+    = neigh_list.d_stencil;
+  for(int k = 0; k < nstencil; k++) {
+    const int jbin = ibin + stencil[k];
+
+    if(ibin == jbin) continue;
+
+    bincount_current = c_bincount[jbin];
+    int j = MY_II < bincount_current ? c_bins(jbin, MY_II) : -1;
+
+    if(j >= 0) {
+      other_x[MY_II] = x(j, 0);
+      other_x[MY_II + atoms_per_bin] = x(j, 1);
+      other_x[MY_II + 2 * atoms_per_bin] = x(j, 2);
+      other_x[MY_II + 3 * atoms_per_bin] = type(j);
+     }
+
+    other_id[MY_II] = j;
+
+    __syncthreads();
+
+    if(i >= 0 && i < nlocal) {
+      #pragma unroll 8
+      for(int m = 0; m < bincount_current; m++) {
+        const int j = other_id[m];
+
+        if(HalfNeigh && (j < i))  continue;
+
+        const X_FLOAT delx = xtmp - other_x[m];
+        const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin];
+        const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin];
+        const int jtype = other_x[m + 3 * atoms_per_bin];
+        const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
+        if((rsq <= cutneighsq(itype,jtype)) && (n < neigh_list.maxneighs)) neighbors_i(n++) = j;
+      }
+    }
+    __syncthreads();
+  }
+
+  if(i >= 0 && i < nlocal) {
+    neigh_list.d_numneigh(i) = n;
+    neigh_list.d_ilist(i) = i;
+  }
+
+  if(n >= neigh_list.maxneighs) {
+    resize() = 1;
+
+    if(n >= new_maxneighs()) new_maxneighs() = n;
+  }
+}
+#endif
+
+template<class DeviceType>
+void NeighborKokkos::full_bin_cluster_kokkos(NeighListKokkos<DeviceType> *list)
+{
+  const int nall = includegroup?atom->nfirst:atom->nlocal;
+  list->grow(nall);
+
+  NeighborKokkosExecute<DeviceType>
+    data(*list,
+         k_cutneighsq.view<DeviceType>(),
+         k_bincount.view<DeviceType>(),
+         k_bins.view<DeviceType>(),nall,
+         atomKK->k_x.view<DeviceType>(),
+         atomKK->k_type.view<DeviceType>(),
+         atomKK->k_mask.view<DeviceType>(),
+         atomKK->k_molecule.view<DeviceType>(),
+         nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo,
+         bininvx,bininvy,bininvz,
+         bboxhi,bboxlo);
+
+  k_cutneighsq.sync<DeviceType>();
+  atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK);
+  Kokkos::deep_copy(list->d_stencil,list->h_stencil);
+  DeviceType::fence();
+
+  while(data.h_resize() > 0) {
+    data.h_resize() = 0;
+    deep_copy(data.resize, data.h_resize);
+
+    MemsetZeroFunctor<DeviceType> f_zero;
+    f_zero.ptr = (void*) k_bincount.view<DeviceType>().ptr_on_device();
+    Kokkos::parallel_for(mbins, f_zero);
+    DeviceType::fence();
+
+    NeighborKokkosBinAtomsFunctor<DeviceType> f(data);
+
+    Kokkos::parallel_for(atom->nlocal+atom->nghost, f);
+    DeviceType::fence();
+
+    deep_copy(data.h_resize, data.resize);
+    if(data.h_resize()) {
+
+      atoms_per_bin += 16;
+      k_bins = DAT::tdual_int_2d("bins", mbins, atoms_per_bin);
+      data.bins = k_bins.view<DeviceType>();
+      data.c_bins = data.bins;
+    }
+  }
+
+  if(list->d_neighbors.dimension_0()<nall) {
+    list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", nall*1.1, list->maxneighs);
+    list->d_numneigh = typename ArrayTypes<DeviceType>::t_int_1d("numneigh", nall*1.1);
+    data.neigh_list.d_neighbors = list->d_neighbors;
+    data.neigh_list.d_numneigh = list->d_numneigh;
+  }
+  data.h_resize()=1;
+  while(data.h_resize()) {
+    data.h_new_maxneighs() = list->maxneighs;
+  data.h_resize() = 0;
+
+  Kokkos::deep_copy(data.resize, data.h_resize);
+  Kokkos::deep_copy(data.new_maxneighs, data.h_new_maxneighs);
+#if DEVICE==2
+    #define BINS_PER_BLOCK 2
+    const int factor = atoms_per_bin<64?2:1;
+    Kokkos::ParallelWorkRequest config((mbins+factor-1)/factor,atoms_per_bin*factor);
+#else
+    const int factor = 1;
+#endif
+
+if(newton_pair) {
+  NeighborClusterKokkosBuildFunctor<DeviceType,NeighClusterSize> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
+//#if DEVICE==2
+//  Kokkos::parallel_for(config, f);
+//#else
+  Kokkos::parallel_for(nall, f);
+//#endif
+} else {
+  NeighborClusterKokkosBuildFunctor<DeviceType,NeighClusterSize> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
+//#if DEVICE==2
+//  Kokkos::parallel_for(config, f);
+//#else
+  Kokkos::parallel_for(nall, f);
+//#endif
+}
+  DeviceType::fence();
+    deep_copy(data.h_resize, data.resize);
+
+    if(data.h_resize()) {
+      deep_copy(data.h_new_maxneighs, data.new_maxneighs);
+      list->maxneighs = data.h_new_maxneighs() * 1.2;
+      list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", list->d_neighbors.dimension_0(), list->maxneighs);
+      data.neigh_list.d_neighbors = list->d_neighbors;
+      data.neigh_list.maxneighs = list->maxneighs;
+    }
+  }
+
+  list->inum = nall;
+  list->gnum = 0;
+
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class Device> template<int ClusterSize>
+void NeighborKokkosExecute<Device>::
+   build_cluster_Item(const int &i) const
+{
+  /* if necessary, goto next page and add pages */
+  int n = 0;
+
+  // get subview of neighbors of i
+
+  const AtomNeighbors neighbors_i = neigh_list.get_neighbors(i);
+  const X_FLOAT xtmp = x(i, 0);
+  const X_FLOAT ytmp = x(i, 1);
+  const X_FLOAT ztmp = x(i, 2);
+  const int itype = type(i);
+
+  const int ibin = coord2bin(xtmp, ytmp, ztmp);
+
+  const int nstencil = neigh_list.nstencil;
+  const typename ArrayTypes<Device>::t_int_1d_const_um stencil
+    = neigh_list.d_stencil;
+
+  for(int k = 0; k < nstencil; k++) {
+    const int jbin = ibin + stencil[k];
+      for(int m = 0; m < c_bincount(jbin); m++) {
+        const int j = c_bins(jbin,m);
+        bool skip = i == j;
+        for(int k = 0; k< (n<neigh_list.maxneighs?n:neigh_list.maxneighs); k++)
+          if((j-(j%ClusterSize)) == neighbors_i(k)) {skip=true;};//{m += ClusterSize - j&(ClusterSize-1)-1; skip=true;}
+
+        if(!skip) {
+          const int jtype = type(j);
+
+          const X_FLOAT delx = xtmp - x(j, 0);
+          const X_FLOAT dely = ytmp - x(j, 1);
+          const X_FLOAT delz = ztmp - x(j, 2);
+          const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
+
+          if(rsq <= cutneighsq(itype,jtype)) {
+            if(n<neigh_list.maxneighs) neighbors_i(n) = (j-(j%ClusterSize));
+            n++;
+            //m += ClusterSize - j&(ClusterSize-1)-1;
+          }
+        }
+
+      }
+  }
+
+  neigh_list.d_numneigh(i) = n;
+
+  if(n >= neigh_list.maxneighs) {
+    resize() = 1;
+
+    if(n >= new_maxneighs()) new_maxneighs() = n;
+  }
+  neigh_list.d_ilist(i) = i;
+}
diff --git a/src/KOKKOS/neigh_list_kokkos.cpp b/src/KOKKOS/neigh_list_kokkos.cpp
new file mode 100644
index 0000000000..dbb0aa5727
--- /dev/null
+++ b/src/KOKKOS/neigh_list_kokkos.cpp
@@ -0,0 +1,118 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "neigh_list_kokkos.h"
+#include "atom.h"
+#include "memory.h"
+
+using namespace LAMMPS_NS;
+
+enum{NSQ,BIN,MULTI};
+
+/* ---------------------------------------------------------------------- */
+
+template<class Device>
+void NeighListKokkos<Device>::clean_copy()
+{
+  ilist = NULL;
+  numneigh = NULL;
+  firstneigh = NULL;
+  firstdouble = NULL;
+  dnum = 0;
+  iskip = NULL;
+  ijskip = NULL;
+  
+  ipage = NULL;
+  dpage = NULL;
+  maxstencil = 0;
+  ghostflag = 0;
+  maxstencil_multi = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class Device>
+void NeighListKokkos<Device>::grow(int nmax)
+{
+  // skip if this list is already long enough to store nmax atoms
+
+  if (nmax <= maxatoms) return;
+  maxatoms = nmax;
+
+  d_ilist = 
+    typename ArrayTypes<Device>::t_int_1d("neighlist:ilist",maxatoms);
+  d_numneigh = 
+    typename ArrayTypes<Device>::t_int_1d("neighlist:numneigh",maxatoms);
+  d_neighbors = 
+    typename ArrayTypes<Device>::t_neighbors_2d("neighlist:neighbors",
+                                                maxatoms,maxneighs);
+
+  memory->sfree(firstneigh);
+  memory->sfree(firstdouble);
+
+  firstneigh = (int **) memory->smalloc(maxatoms*sizeof(int *),
+                                        "neighlist:firstneigh");
+  if (dnum)
+    firstdouble = (double **) memory->smalloc(maxatoms*sizeof(double *),
+                                              "neighlist:firstdouble");
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class Device>
+void NeighListKokkos<Device>::stencil_allocate(int smax, int style)
+{
+  int i;
+
+  if (style == BIN) {
+    if (smax > maxstencil) {
+      maxstencil = smax;
+      d_stencil = 
+        memory->create_kokkos(d_stencil,h_stencil,stencil,maxstencil,
+                              "neighlist:stencil");
+      if (ghostflag) {
+        memory->destroy(stencilxyz);
+        memory->create(stencilxyz,maxstencil,3,"neighlist:stencilxyz");
+      }
+    }
+
+  } else {
+    int n = atom->ntypes;
+    if (maxstencil_multi == 0) {
+      nstencil_multi = new int[n+1];
+      stencil_multi = new int*[n+1];
+      distsq_multi = new double*[n+1];
+      for (i = 1; i <= n; i++) {
+        nstencil_multi[i] = 0;
+        stencil_multi[i] = NULL;
+        distsq_multi[i] = NULL;
+      }
+    }
+    if (smax > maxstencil_multi) {
+      maxstencil_multi = smax;
+      for (i = 1; i <= n; i++) {
+        memory->destroy(stencil_multi[i]);
+        memory->destroy(distsq_multi[i]);
+        memory->create(stencil_multi[i],maxstencil_multi,
+                       "neighlist:stencil_multi");
+        memory->create(distsq_multi[i],maxstencil_multi,
+                       "neighlist:distsq_multi");
+      }
+    }
+  }
+}
+
+template class NeighListKokkos<LMPDeviceType>;
+#if DEVICE==2
+template class NeighListKokkos<LMPHostType>;
+#endif
diff --git a/src/KOKKOS/neigh_list_kokkos.h b/src/KOKKOS/neigh_list_kokkos.h
new file mode 100644
index 0000000000..fd4ac3acc9
--- /dev/null
+++ b/src/KOKKOS/neigh_list_kokkos.h
@@ -0,0 +1,104 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef LMP_NEIGH_LIST_KOKKOS_H
+#define LMP_NEIGH_LIST_KOKKOS_H
+
+#include "pointers.h"
+#include "neigh_list.h"
+#include "kokkos_type.h"
+
+namespace LAMMPS_NS {
+
+enum{FULL,HALFTHREAD,HALF,N2,FULLCLUSTER};
+
+class AtomNeighbors
+{
+ public:
+  const int num_neighs;
+
+  KOKKOS_INLINE_FUNCTION
+  AtomNeighbors(int* const & firstneigh, const int & _num_neighs, 
+                const int & stride):
+  _firstneigh(firstneigh), _stride(stride), num_neighs(_num_neighs) {};
+  KOKKOS_INLINE_FUNCTION
+  int& operator()(const int &i) const {
+    return _firstneigh[i*_stride];
+  }
+
+ private:
+  int* const _firstneigh;
+  const int _stride;
+};
+
+class AtomNeighborsConst
+{
+ public:
+  const int* const _firstneigh;
+  const int numneigh;
+
+  KOKKOS_INLINE_FUNCTION
+  AtomNeighborsConst(int* const & firstneigh, const int & _numneigh, 
+                     const int & stride):
+  _firstneigh(firstneigh), _stride(stride), numneigh(_numneigh) {};
+  KOKKOS_INLINE_FUNCTION
+  const int& operator()(const int &i) const {
+    return _firstneigh[i*_stride];
+  }
+
+ private:
+  //const int* const _firstneigh;
+  const int _stride;
+};
+
+template<class Device>
+class NeighListKokkos: public NeighList {
+  int _stride;
+
+public:
+  int maxneighs;
+
+  void clean_copy();
+  void grow(int nmax);
+  typename ArrayTypes<Device>::t_neighbors_2d d_neighbors;
+  typename ArrayTypes<Device>::t_int_1d d_ilist;   // local indices of I atoms
+  typename ArrayTypes<Device>::t_int_1d d_numneigh; // # of J neighs for each I
+  typename ArrayTypes<Device>::t_int_1d d_stencil;  // # of J neighs for each I
+  typename ArrayTypes<LMPHostType>::t_int_1d h_stencil; // # of J neighs per I
+
+  NeighListKokkos(class LAMMPS *lmp):
+  NeighList(lmp) {_stride = 1; maxneighs = 16;};
+  ~NeighListKokkos() {stencil = NULL; numneigh = NULL; ilist = NULL;};
+
+  KOKKOS_INLINE_FUNCTION
+  AtomNeighbors get_neighbors(const int &i) const {
+    return AtomNeighbors(&d_neighbors(i,0),d_numneigh(i),
+                         &d_neighbors(i,1)-&d_neighbors(i,0));
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  AtomNeighborsConst get_neighbors_const(const int &i) const {
+    return AtomNeighborsConst(&d_neighbors(i,0),d_numneigh(i),
+                              &d_neighbors(i,1)-&d_neighbors(i,0));
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  int& num_neighs(const int & i) const {
+    return d_numneigh(i);
+  }
+  void stencil_allocate(int smax, int style);
+};
+
+}
+
+#endif
diff --git a/src/KOKKOS/neighbor_kokkos.cpp b/src/KOKKOS/neighbor_kokkos.cpp
new file mode 100644
index 0000000000..adea823976
--- /dev/null
+++ b/src/KOKKOS/neighbor_kokkos.cpp
@@ -0,0 +1,269 @@
+;/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "neighbor_kokkos.h"
+#include "atom.h"
+#include "pair.h"
+#include "neigh_request.h"
+#include "memory.h"
+
+using namespace LAMMPS_NS;
+
+enum{NSQ,BIN,MULTI};     // also in neigh_list.cpp
+
+/* ---------------------------------------------------------------------- */
+
+NeighborKokkos::NeighborKokkos(LAMMPS *lmp) : Neighbor(lmp)
+{
+  atoms_per_bin = 16;
+
+  nlist_host = 0;
+  lists_host = NULL;
+  pair_build_host = NULL;
+  stencil_create_host = NULL;
+  nlist_device = 0;
+  lists_device = NULL;
+  pair_build_device = NULL;
+  stencil_create_device = NULL;
+}
+
+/* ---------------------------------------------------------------------- */
+
+NeighborKokkos::~NeighborKokkos()
+{
+  memory->destroy_kokkos(k_cutneighsq,cutneighsq);
+  cutneighsq = NULL;
+
+  for (int i = 0; i < nlist_host; i++) delete lists_host[i];
+  delete [] lists_host;
+  for (int i = 0; i < nlist_device; i++) delete lists_device[i];
+  delete [] lists_device;
+
+  delete [] pair_build_device;
+  delete [] pair_build_host;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void NeighborKokkos::init()
+{
+  atomKK = (AtomKokkos *) atom;
+  Neighbor::init();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void NeighborKokkos::init_cutneighsq_kokkos(int n)
+{
+  memory->create_kokkos(k_cutneighsq,cutneighsq,n+1,n+1,"neigh:cutneighsq");
+  k_cutneighsq.modify<LMPHostType>();
+}
+
+/* ---------------------------------------------------------------------- */
+
+int NeighborKokkos::init_lists_kokkos()
+{ 
+  int i;
+
+  for (i = 0; i < nlist_host; i++) delete lists_host[i];
+  delete [] lists_host;
+  delete [] pair_build_host;
+  delete [] stencil_create_host;
+  nlist_host = 0;
+
+  for (i = 0; i < nlist_device; i++) delete lists_device[i];
+  delete [] lists_device;
+  delete [] pair_build_device;
+  delete [] stencil_create_device;
+  nlist_device = 0;
+
+  nlist = 0;
+  for (i = 0; i < nrequest; i++) {
+    if (requests[i]->kokkos_device) nlist_device++;
+    else if (requests[i]->kokkos_host) nlist_host++;
+    else nlist++;
+  }
+
+  lists_host = new NeighListKokkos<LMPHostType>*[nrequest];
+  pair_build_host = new PairPtrHost[nrequest];
+  stencil_create_host = new StencilPtrHost[nrequest];
+  for (i = 0; i < nrequest; i++) {
+    lists_host[i] = NULL;
+    pair_build_host[i] = NULL;
+    stencil_create_host[i] = NULL;
+  }
+
+  for (i = 0; i < nrequest; i++) {
+    if (!requests[i]->kokkos_host) continue;
+    lists_host[i] = new NeighListKokkos<LMPHostType>(lmp);
+    lists_host[i]->index = i;
+    lists_host[i]->dnum = requests[i]->dnum;
+    if (requests[i]->pair) {
+      Pair *pair = (Pair *) requests[i]->requestor;
+      pair->init_list(requests[i]->id,lists_host[i]);
+    }
+  }
+
+  lists_device = new NeighListKokkos<LMPDeviceType>*[nrequest];
+  pair_build_device = new PairPtrDevice[nrequest];
+  stencil_create_device = new StencilPtrDevice[nrequest];
+  for (i = 0; i < nrequest; i++) {
+    lists_device[i] = NULL;
+    pair_build_device[i] = NULL;
+    stencil_create_device[i] = NULL;
+  }
+
+  for (i = 0; i < nrequest; i++) {
+    if (!requests[i]->kokkos_device) continue;
+    lists_device[i] = new NeighListKokkos<LMPDeviceType>(lmp);
+    lists_device[i]->index = i;
+    lists_device[i]->dnum = requests[i]->dnum;
+    if (requests[i]->pair) {
+      Pair *pair = (Pair *) requests[i]->requestor;
+      pair->init_list(requests[i]->id,lists_device[i]);
+    }
+  }
+
+  // return # of non-Kokkos lists
+
+  return nlist;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void NeighborKokkos::init_list_flags1_kokkos(int i)
+{ 
+  if (lists_host[i]) {
+    lists_host[i]->buildflag = 1;
+    if (pair_build_host[i] == NULL) lists_host[i]->buildflag = 0;
+    if (requests[i]->occasional) lists_host[i]->buildflag = 0;
+    
+    lists_host[i]->growflag = 1;
+    if (requests[i]->copy) lists_host[i]->growflag = 0;
+    
+    lists_host[i]->stencilflag = 1;
+    if (style == NSQ) lists_host[i]->stencilflag = 0;
+    if (stencil_create[i] == NULL) lists_host[i]->stencilflag = 0;
+    
+    lists_host[i]->ghostflag = 0;
+    if (requests[i]->ghost) lists_host[i]->ghostflag = 1;
+    if (requests[i]->ghost && !requests[i]->occasional) anyghostlist = 1;
+  }
+  
+  if (lists_device[i]) {
+    lists_device[i]->buildflag = 1;
+    if (pair_build_device[i] == NULL) lists_device[i]->buildflag = 0;
+    if (requests[i]->occasional) lists_device[i]->buildflag = 0;
+    
+    lists_device[i]->growflag = 1;
+    if (requests[i]->copy) lists_device[i]->growflag = 0;
+    
+    lists_device[i]->stencilflag = 1;
+    if (style == NSQ) lists_device[i]->stencilflag = 0;
+    if (stencil_create[i] == NULL) lists_device[i]->stencilflag = 0;
+    
+    lists_device[i]->ghostflag = 0;
+    if (requests[i]->ghost) lists_device[i]->ghostflag = 1;
+    if (requests[i]->ghost && !requests[i]->occasional) anyghostlist = 1;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void NeighborKokkos::init_list_flags2_kokkos(int i)
+{ 
+  if (lists_host[i]) {
+    if (lists_host[i]->buildflag) blist[nblist++] = i;
+    if (lists_host[i]->growflag && requests[i]->occasional == 0)
+      glist[nglist++] = i;
+    if (lists_host[i]->stencilflag && requests[i]->occasional == 0)
+      slist[nslist++] = i;
+  }
+
+  if (lists_device[i]) {
+    if (lists_device[i]->buildflag) blist[nblist++] = i;
+    if (lists_device[i]->growflag && requests[i]->occasional == 0)
+      glist[nglist++] = i;
+    if (lists_device[i]->stencilflag && requests[i]->occasional == 0)
+      slist[nslist++] = i;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void NeighborKokkos::init_list_grow_kokkos(int i)
+{
+  if (lists_host[i]!=NULL && lists_host[i]->growflag)
+    lists_host[i]->grow(maxatom);
+  if (lists_device[i]!=NULL && lists_device[i]->growflag)
+    lists_device[i]->grow(maxatom);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void NeighborKokkos::choose_build(int index, NeighRequest *rq)
+{
+  if (rq->kokkos_host != 0) {
+    PairPtrHost pb = NULL;
+    if (rq->full) pb = &NeighborKokkos::full_bin_kokkos<LMPHostType,0>;
+    else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos<LMPHostType,1>;
+    pair_build_host[index] = pb;
+    return;
+  }
+  if (rq->kokkos_device != 0) {
+    PairPtrDevice pb = NULL;
+    if (rq->full) {
+      if (rq->full_cluster) pb = &NeighborKokkos::full_bin_cluster_kokkos<LMPDeviceType>;
+      else pb = &NeighborKokkos::full_bin_kokkos<LMPDeviceType,0>;
+    }
+    else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos<LMPDeviceType,1>;
+    pair_build_device[index] = pb;
+    return;
+  }
+
+  Neighbor::choose_build(index,rq);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void NeighborKokkos::build_kokkos(int i)
+{
+  if (lists_host[blist[i]])
+    (this->*pair_build_host[blist[i]])(lists_host[blist[i]]);
+  else if (lists_device[blist[i]])
+    (this->*pair_build_device[blist[i]])(lists_device[blist[i]]);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void NeighborKokkos::setup_bins_kokkos(int i)
+{
+  if (lists_host[slist[i]]) {
+    lists_host[slist[i]]->stencil_allocate(smax,style);
+    (this->*stencil_create[slist[i]])(lists_host[slist[i]],sx,sy,sz);
+  } else if (lists_device[slist[i]]) {
+    lists_device[slist[i]]->stencil_allocate(smax,style);
+    (this->*stencil_create[slist[i]])(lists_device[slist[i]],sx,sy,sz);
+  }
+
+  if (i < nslist-1) return;
+
+  if (maxhead > k_bins.d_view.dimension_0()) {
+    k_bins = DAT::tdual_int_2d("Neighbor::d_bins",maxhead,atoms_per_bin);
+    k_bincount = DAT::tdual_int_1d("Neighbor::d_bincount",maxhead);
+  }
+}
+
+// include to trigger instantiation of templated functions
+
+#include "neigh_full_kokkos.h"
diff --git a/src/KOKKOS/neighbor_kokkos.h b/src/KOKKOS/neighbor_kokkos.h
new file mode 100644
index 0000000000..30e73792e4
--- /dev/null
+++ b/src/KOKKOS/neighbor_kokkos.h
@@ -0,0 +1,257 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef LMP_NEIGHBOR_KOKKOS_H
+#define LMP_NEIGHBOR_KOKKOS_H
+
+#include "neighbor.h"
+#include "neigh_list_kokkos.h"
+#include "kokkos_type.h"
+
+namespace LAMMPS_NS {
+
+template<class Device>
+class NeighborKokkosExecute
+{
+  typedef ArrayTypes<Device> AT;
+
+ public:
+  NeighListKokkos<Device> neigh_list;
+  const typename AT::t_xfloat_2d_randomread cutneighsq;
+  const typename AT::t_int_1d bincount;
+  const typename AT::t_int_1d_const c_bincount;
+  typename AT::t_int_2d bins;
+  typename AT::t_int_2d_const c_bins;
+  const typename AT::t_x_array_randomread x;
+  const typename AT::t_int_1d_const type,mask,molecule;
+
+  const int nbinx,nbiny,nbinz;
+  const int mbinx,mbiny,mbinz;
+  const int mbinxlo,mbinylo,mbinzlo;
+  const X_FLOAT bininvx,bininvy,bininvz;
+  X_FLOAT bboxhi[3],bboxlo[3];
+
+  const int nlocal;
+
+  typename AT::t_int_scalar resize;
+  typename AT::t_int_scalar new_maxneighs;
+  typename ArrayTypes<LMPHostType>::t_int_scalar h_resize;
+  typename ArrayTypes<LMPHostType>::t_int_scalar h_new_maxneighs;
+
+  NeighborKokkosExecute(
+    const NeighListKokkos<Device> &_neigh_list,
+    const typename AT::t_xfloat_2d_randomread &_cutneighsq,
+    const typename AT::t_int_1d &_bincount,
+    const typename AT::t_int_2d &_bins,
+    const int _nlocal,
+        const typename AT::t_x_array_randomread &_x,
+    const typename AT::t_int_1d_const &_type,
+    const typename AT::t_int_1d_const &_mask,
+    const typename AT::t_int_1d_const &_molecule,
+    const int & _nbinx,const int & _nbiny,const int & _nbinz,
+    const int & _mbinx,const int & _mbiny,const int & _mbinz,
+    const int & _mbinxlo,const int & _mbinylo,const int & _mbinzlo,
+    const X_FLOAT &_bininvx,const X_FLOAT &_bininvy,const X_FLOAT &_bininvz,
+    const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo):
+    neigh_list(_neigh_list), cutneighsq(_cutneighsq),
+    bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins),
+    nlocal(_nlocal),
+    x(_x),type(_type),mask(_mask),molecule(_molecule),
+    nbinx(_nbinx),nbiny(_nbiny),nbinz(_nbinz),
+    mbinx(_mbinx),mbiny(_mbiny),mbinz(_mbinz),
+    mbinxlo(_mbinxlo),mbinylo(_mbinylo),mbinzlo(_mbinzlo),
+    bininvx(_bininvx),bininvy(_bininvy),bininvz(_bininvz) {
+
+    bboxlo[0] = _bboxlo[0]; bboxlo[1] = _bboxlo[1]; bboxlo[2] = _bboxlo[2];
+    bboxhi[0] = _bboxhi[0]; bboxhi[1] = _bboxhi[1]; bboxhi[2] = _bboxhi[2];
+    
+    resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize");
+#ifndef KOKKOS_USE_UVM
+    h_resize = Kokkos::create_mirror_view(resize);
+#else
+    h_resize = resize;
+#endif
+    h_resize() = 1;
+    new_maxneighs = typename AT::
+      t_int_scalar("NeighborKokkosFunctor::new_maxneighs");
+#ifndef KOKKOS_USE_UVM
+    h_new_maxneighs = Kokkos::create_mirror_view(new_maxneighs);
+#else
+    h_new_maxneighs = new_maxneighs;
+#endif
+    h_new_maxneighs() = neigh_list.maxneighs;
+  };
+
+  ~NeighborKokkosExecute() {neigh_list.clean_copy();};
+
+  template<int HalfNeigh, int GhostNewton>
+  KOKKOS_FUNCTION
+  void build_Item(const int &i) const;
+
+  template<int ClusterSize>
+  KOKKOS_FUNCTION
+  void build_cluster_Item(const int &i) const;
+
+#if DEVICE==2
+  template<int HalfNeigh>
+  __device__ inline
+  void build_ItemCuda(Device dev) const;
+#endif
+
+  KOKKOS_INLINE_FUNCTION
+  void binatomsItem(const int &i) const;
+
+  KOKKOS_INLINE_FUNCTION
+  int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z) const
+  {
+    int ix,iy,iz;
+
+    if (x >= bboxhi[0])
+      ix = static_cast<int> ((x-bboxhi[0])*bininvx) + nbinx;
+    else if (x >= bboxlo[0]) {
+      ix = static_cast<int> ((x-bboxlo[0])*bininvx);
+      ix = MIN(ix,nbinx-1);
+    } else
+      ix = static_cast<int> ((x-bboxlo[0])*bininvx) - 1;
+
+    if (y >= bboxhi[1])
+      iy = static_cast<int> ((y-bboxhi[1])*bininvy) + nbiny;
+    else if (y >= bboxlo[1]) {
+      iy = static_cast<int> ((y-bboxlo[1])*bininvy);
+      iy = MIN(iy,nbiny-1);
+    } else
+      iy = static_cast<int> ((y-bboxlo[1])*bininvy) - 1;
+
+    if (z >= bboxhi[2])
+      iz = static_cast<int> ((z-bboxhi[2])*bininvz) + nbinz;
+    else if (z >= bboxlo[2]) {
+      iz = static_cast<int> ((z-bboxlo[2])*bininvz);
+      iz = MIN(iz,nbinz-1);
+    } else
+      iz = static_cast<int> ((z-bboxlo[2])*bininvz) - 1;
+
+    return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
+  }
+};
+
+template<class Device>
+struct NeighborKokkosBinAtomsFunctor {
+  typedef Device device_type;
+
+  const NeighborKokkosExecute<Device> c;
+
+  NeighborKokkosBinAtomsFunctor(const NeighborKokkosExecute<Device> &_c):
+    c(_c) {};
+  ~NeighborKokkosBinAtomsFunctor() {}
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int & i) const {
+    c.binatomsItem(i);
+  }
+};
+
+template<class Device,int HALF_NEIGH,int GHOST_NEWTON>
+struct NeighborKokkosBuildFunctor {
+  typedef Device device_type;
+
+  const NeighborKokkosExecute<Device> c;
+  const size_t sharedsize;
+
+  NeighborKokkosBuildFunctor(const NeighborKokkosExecute<Device> &_c, 
+                             const size_t _sharedsize):c(_c),
+                             sharedsize(_sharedsize) {};
+  
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int & i) const {
+    c.template build_Item<HALF_NEIGH,GHOST_NEWTON>(i);
+  }
+#if DEVICE==2
+  KOKKOS_INLINE_FUNCTION
+  void operator() (Device dev) const {
+    c.template build_ItemCuda<HALF_NEIGH>(dev);
+  }
+  size_t shmem_size() const { return sharedsize; }
+#endif
+};
+
+template<class Device,int ClusterSize>
+struct NeighborClusterKokkosBuildFunctor {
+  typedef Device device_type;
+
+  const NeighborKokkosExecute<Device> c;
+  const size_t sharedsize;
+
+  NeighborClusterKokkosBuildFunctor(const NeighborKokkosExecute<Device> &_c,
+                             const size_t _sharedsize):c(_c),
+                             sharedsize(_sharedsize) {};
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int & i) const {
+    c.template build_cluster_Item<ClusterSize>(i);
+  }
+};
+
+class NeighborKokkos : public Neighbor {
+ public:
+  class AtomKokkos *atomKK;
+
+  int nlist_host;                       // pairwise neighbor lists on Host
+  NeighListKokkos<LMPHostType> **lists_host;
+  int nlist_device;                     // pairwise neighbor lists on Device
+  NeighListKokkos<LMPDeviceType> **lists_device;
+
+  NeighborKokkos(class LAMMPS *);
+  ~NeighborKokkos();
+  void init();
+
+ private:
+  int atoms_per_bin;
+  DAT::tdual_xfloat_2d k_cutneighsq;
+  DAT::tdual_int_1d k_bincount;
+  DAT::tdual_int_2d k_bins;
+
+  void init_cutneighsq_kokkos(int);
+  int init_lists_kokkos();
+  void init_list_flags1_kokkos(int);
+  void init_list_flags2_kokkos(int);
+  void init_list_grow_kokkos(int);
+  void choose_build(int, NeighRequest *);
+  void build_kokkos(int);
+  void setup_bins_kokkos(int);
+  
+  typedef void (NeighborKokkos::*PairPtrHost)
+    (class NeighListKokkos<LMPHostType> *);
+  PairPtrHost *pair_build_host;
+  typedef void (NeighborKokkos::*PairPtrDevice)
+    (class NeighListKokkos<LMPDeviceType> *);
+  PairPtrDevice *pair_build_device;
+
+  template<class DeviceType,int HALF_NEIGH>
+  void full_bin_kokkos(NeighListKokkos<DeviceType> *list);
+  template<class DeviceType>
+  void full_bin_cluster_kokkos(NeighListKokkos<DeviceType> *list);
+
+  typedef void (NeighborKokkos::*StencilPtrHost)
+    (class NeighListKokkos<LMPHostType> *, int, int, int);
+  StencilPtrHost *stencil_create_host;
+  typedef void (NeighborKokkos::*StencilPtrDevice)
+    (class NeighListKokkos<LMPDeviceType> *, int, int, int);
+  StencilPtrDevice *stencil_create_device;
+};
+
+}
+
+#endif
+
+/* ERROR/WARNING messages:
+
+*/
diff --git a/src/KOKKOS/pair_kokkos.h b/src/KOKKOS/pair_kokkos.h
new file mode 100644
index 0000000000..de67e7df0b
--- /dev/null
+++ b/src/KOKKOS/pair_kokkos.h
@@ -0,0 +1,655 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+#else
+
+#ifndef LMP_PAIR_KOKKOS_H
+#define LMP_PAIR_KOKKOS_H
+
+#include "Kokkos_Macros.hpp"
+#include "pair.h"
+#include "neigh_list_kokkos.h"
+#include "Kokkos_Vectorization.hpp"
+
+namespace LAMMPS_NS {
+
+template <class PairStyle, int NEIGHFLAG, bool STACKPARAMS, class Specialisation = void>
+struct PairComputeFunctor  {
+  typedef typename PairStyle::device_type device_type ;
+  typedef EV_FLOAT value_type;
+
+  PairStyle c;
+  NeighListKokkos<device_type> list;
+
+  PairComputeFunctor(PairStyle* c_ptr,
+                          NeighListKokkos<device_type>* list_ptr):
+  c(*c_ptr),list(*list_ptr) {};
+  ~PairComputeFunctor() {c.cleanup_copy();list.clean_copy();};
+
+  KOKKOS_INLINE_FUNCTION int sbmask(const int& j) const {
+    return j >> SBBITS & 3;
+  }
+
+  template<int EVFLAG, int NEWTON_PAIR>
+  KOKKOS_FUNCTION
+  EV_FLOAT compute_item(const int& ii,
+                        const NeighListKokkos<device_type> &list) const {
+    EV_FLOAT ev;
+    const int i = list.d_ilist[ii];
+    const X_FLOAT xtmp = c.x(i,0);
+    const X_FLOAT ytmp = c.x(i,1);
+    const X_FLOAT ztmp = c.x(i,2);
+    const int itype = c.type(i);
+
+    const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
+    const int jnum = list.d_numneigh[i];
+
+    F_FLOAT fxtmp = 0.0;
+    F_FLOAT fytmp = 0.0;
+    F_FLOAT fztmp = 0.0;
+
+    for (int jj = 0; jj < jnum; jj++) {
+      int j = neighbors_i(jj);
+      const F_FLOAT factor_lj = c.special_lj[sbmask(j)];
+      j &= NEIGHMASK;
+      const X_FLOAT delx = xtmp - c.x(j,0);
+      const X_FLOAT dely = ytmp - c.x(j,1);
+      const X_FLOAT delz = ztmp - c.x(j,2);
+      const int jtype = c.type(j);
+      const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
+
+      if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) {
+
+        const F_FLOAT fpair = factor_lj*c.template compute_fpair<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
+
+        fxtmp += delx*fpair;
+        fytmp += dely*fpair;
+        fztmp += delz*fpair;
+        if ((NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < c.nlocal)) {
+          Kokkos::atomic_fetch_add(&c.f(j,0),-delx*fpair);
+          Kokkos::atomic_fetch_add(&c.f(j,1),-dely*fpair);
+          Kokkos::atomic_fetch_add(&c.f(j,2),-delz*fpair);
+        }
+
+        if ((NEIGHFLAG==HALF) && (NEWTON_PAIR || j < c.nlocal)) {
+          c.f(j,0) -= delx*fpair;
+          c.f(j,1) -= dely*fpair;
+          c.f(j,2) -= delz*fpair;
+        }
+
+        if (EVFLAG) {
+          if (c.eflag) {
+            ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<c.nlocal)))?1.0:0.5)*
+              factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
+            if (c.COUL_FLAG)
+              ev.ecoul += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<c.nlocal)))?1.0:0.5)*
+                factor_lj * c.template compute_ecoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
+          }
+
+          if (c.vflag_either) ev_tally(ev,i,j,fpair,delx,dely,delz);
+        }
+      }
+
+    }
+    if (NEIGHFLAG == HALFTHREAD) {
+      Kokkos::atomic_fetch_add(&c.f(i,0),fxtmp);
+      Kokkos::atomic_fetch_add(&c.f(i,1),fytmp);
+      Kokkos::atomic_fetch_add(&c.f(i,2),fztmp);
+    } else {
+      c.f(i,0) += fxtmp;
+      c.f(i,1) += fytmp;
+      c.f(i,2) += fztmp;
+    }
+
+    return ev;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+    void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
+      const F_FLOAT &fpair, const F_FLOAT &delx,
+                  const F_FLOAT &dely, const F_FLOAT &delz) const
+  {
+    const int EFLAG = c.eflag;
+    const int NEWTON_PAIR = c.newton_pair;
+    const int VFLAG = c.vflag_either;
+
+    if (EFLAG) {
+      if (c.eflag_atom) {
+        const E_FLOAT epairhalf = 0.5 * (ev.evdwl + ev.ecoul);
+        if (NEWTON_PAIR || i < c.nlocal) c.eatom[i] += epairhalf;
+        if (NEWTON_PAIR || j < c.nlocal) c.eatom[j] += epairhalf;
+      }
+    }
+
+    if (VFLAG) {
+      const E_FLOAT v0 = delx*delx*fpair;
+      const E_FLOAT v1 = dely*dely*fpair;
+      const E_FLOAT v2 = delz*delz*fpair;
+      const E_FLOAT v3 = delx*dely*fpair;
+      const E_FLOAT v4 = delx*delz*fpair;
+      const E_FLOAT v5 = dely*delz*fpair;
+
+      if (c.vflag_global) {
+        if (NEIGHFLAG) {
+          if (NEWTON_PAIR) {
+            ev.v[0] += v0;
+            ev.v[1] += v1;
+            ev.v[2] += v2;
+            ev.v[3] += v3;
+            ev.v[4] += v4;
+            ev.v[5] += v5;
+          } else {
+            if (i < c.nlocal) {
+              ev.v[0] += 0.5*v0;
+              ev.v[1] += 0.5*v1;
+              ev.v[2] += 0.5*v2;
+              ev.v[3] += 0.5*v3;
+              ev.v[4] += 0.5*v4;
+              ev.v[5] += 0.5*v5;
+            }
+            if (j < c.nlocal) {
+              ev.v[0] += 0.5*v0;
+              ev.v[1] += 0.5*v1;
+              ev.v[2] += 0.5*v2;
+              ev.v[3] += 0.5*v3;
+              ev.v[4] += 0.5*v4;
+              ev.v[5] += 0.5*v5;
+            }
+          }
+        } else {
+          ev.v[0] += 0.5*v0;
+          ev.v[1] += 0.5*v1;
+          ev.v[2] += 0.5*v2;
+          ev.v[3] += 0.5*v3;
+          ev.v[4] += 0.5*v4;
+          ev.v[5] += 0.5*v5;
+        }
+      }
+
+      if (c.vflag_atom) {
+        if (NEWTON_PAIR || i < c.nlocal) {
+          c.d_vatom(i,0) += 0.5*v0;
+          c.d_vatom(i,1) += 0.5*v1;
+          c.d_vatom(i,2) += 0.5*v2;
+          c.d_vatom(i,3) += 0.5*v3;
+          c.d_vatom(i,4) += 0.5*v4;
+          c.d_vatom(i,5) += 0.5*v5;
+        }
+        if (NEWTON_PAIR || (NEIGHFLAG && j < c.nlocal)) {
+        c.d_vatom(j,0) += 0.5*v0;
+        c.d_vatom(j,1) += 0.5*v1;
+        c.d_vatom(j,2) += 0.5*v2;
+        c.d_vatom(j,3) += 0.5*v3;
+        c.d_vatom(j,4) += 0.5*v4;
+        c.d_vatom(j,5) += 0.5*v5;
+        }
+      }
+    }
+  }
+
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(const int i) const {
+    if (c.newton_pair) compute_item<0,1>(i,list);
+    else compute_item<0,0>(i,list);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(const int i, value_type &energy_virial) const {
+    if (c.newton_pair)
+      energy_virial += compute_item<1,1>(i,list);
+    else
+      energy_virial += compute_item<1,0>(i,list);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  static void init(volatile value_type &update) {
+    update.evdwl = 0;
+    update.ecoul = 0;
+    update.v[0] = 0;
+    update.v[1] = 0;
+    update.v[2] = 0;
+    update.v[3] = 0;
+    update.v[4] = 0;
+    update.v[5] = 0;
+  }
+  KOKKOS_INLINE_FUNCTION 
+  static void join(volatile value_type &update,
+                   const volatile value_type &source) {
+    update.evdwl += source.evdwl;
+    update.ecoul += source.ecoul;
+    update.v[0] += source.v[0];
+    update.v[1] += source.v[1];
+    update.v[2] += source.v[2];
+    update.v[3] += source.v[3];
+    update.v[4] += source.v[4];
+    update.v[5] += source.v[5];
+  }
+
+
+};
+
+template <class PairStyle, bool STACKPARAMS, class Specialisation>
+struct PairComputeFunctor<PairStyle,FULLCLUSTER,STACKPARAMS,Specialisation>  {
+  typedef typename PairStyle::device_type device_type ;
+  typedef Kokkos::Vectorization<device_type,NeighClusterSize> vectorization;
+  typedef EV_FLOAT value_type;
+
+  PairStyle c;
+  NeighListKokkos<device_type> list;
+
+  PairComputeFunctor(PairStyle* c_ptr,
+                          NeighListKokkos<device_type>* list_ptr):
+  c(*c_ptr),list(*list_ptr) {};
+  ~PairComputeFunctor() {c.cleanup_copy();list.clean_copy();};
+
+  KOKKOS_INLINE_FUNCTION int sbmask(const int& j) const {
+    return j >> SBBITS & 3;
+  }
+
+  template<int EVFLAG, int NEWTON_PAIR>
+  KOKKOS_FUNCTION
+  EV_FLOAT compute_item(const device_type& dev,
+                        const NeighListKokkos<device_type> &list) const {
+    EV_FLOAT ev;
+    const int i = vectorization::global_thread_rank(dev);
+
+    const X_FLOAT xtmp = c.c_x(i,0);
+    const X_FLOAT ytmp = c.c_x(i,1);
+    const X_FLOAT ztmp = c.c_x(i,2);
+    const int itype = c.type(i);
+
+    const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
+    const int jnum = list.d_numneigh[i];
+
+    F_FLOAT fxtmp = 0.0;
+    F_FLOAT fytmp = 0.0;
+    F_FLOAT fztmp = 0.0;
+
+    for (int jj = 0; jj < jnum; jj++) {
+      const int jjj = neighbors_i(jj);
+
+      for (int k = vectorization::begin(); k<NeighClusterSize; k+=vectorization::increment) {
+        const F_FLOAT factor_lj = c.special_lj[sbmask(jjj+k)];
+        const int j = (jjj + k)&NEIGHMASK;
+        if((j==i)||(j>=c.nall)) continue;
+        const X_FLOAT delx = xtmp - c.c_x(j,0);
+        const X_FLOAT dely = ytmp - c.c_x(j,1);
+        const X_FLOAT delz = ztmp - c.c_x(j,2);
+        const int jtype = c.type(j);
+        const F_FLOAT rsq = (delx*delx + dely*dely + delz*delz);
+
+        if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) {
+
+          const F_FLOAT fpair = factor_lj*c.template compute_fpair<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
+          fxtmp += delx*fpair;
+          fytmp += dely*fpair;
+          fztmp += delz*fpair;
+
+          if (EVFLAG) {
+            if (c.eflag) {
+              ev.evdwl += 0.5*
+                factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
+              if (c.COUL_FLAG)
+                ev.ecoul += 0.5*
+                  factor_lj * c.template compute_ecoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
+            }
+
+            if (c.vflag_either) ev_tally(ev,i,j,fpair,delx,dely,delz);
+          }
+        }
+      }
+    }
+
+    const F_FLOAT fx = vectorization::reduce(fxtmp);
+    const F_FLOAT fy = vectorization::reduce(fytmp);
+    const F_FLOAT fz = vectorization::reduce(fztmp);
+    if(vectorization::is_lane_0(dev)) {
+      c.f(i,0) += fx;
+      c.f(i,1) += fy;
+      c.f(i,2) += fz;
+    }
+
+    return ev;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+    void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
+      const F_FLOAT &fpair, const F_FLOAT &delx,
+                  const F_FLOAT &dely, const F_FLOAT &delz) const
+  {
+    const int EFLAG = c.eflag;
+    const int NEWTON_PAIR = c.newton_pair;
+    const int VFLAG = c.vflag_either;
+
+    if (EFLAG) {
+      if (c.eflag_atom) {
+        const E_FLOAT epairhalf = 0.5 * (ev.evdwl + ev.ecoul);
+        if (NEWTON_PAIR || i < c.nlocal) c.eatom[i] += epairhalf;
+        if (NEWTON_PAIR || j < c.nlocal) c.eatom[j] += epairhalf;
+      }
+    }
+
+    if (VFLAG) {
+      const E_FLOAT v0 = delx*delx*fpair;
+      const E_FLOAT v1 = dely*dely*fpair;
+      const E_FLOAT v2 = delz*delz*fpair;
+      const E_FLOAT v3 = delx*dely*fpair;
+      const E_FLOAT v4 = delx*delz*fpair;
+      const E_FLOAT v5 = dely*delz*fpair;
+
+      if (c.vflag_global) {
+          ev.v[0] += 0.5*v0;
+          ev.v[1] += 0.5*v1;
+          ev.v[2] += 0.5*v2;
+          ev.v[3] += 0.5*v3;
+          ev.v[4] += 0.5*v4;
+          ev.v[5] += 0.5*v5;
+      }
+
+      if (c.vflag_atom) {
+        if (i < c.nlocal) {
+          c.d_vatom(i,0) += 0.5*v0;
+          c.d_vatom(i,1) += 0.5*v1;
+          c.d_vatom(i,2) += 0.5*v2;
+          c.d_vatom(i,3) += 0.5*v3;
+          c.d_vatom(i,4) += 0.5*v4;
+          c.d_vatom(i,5) += 0.5*v5;
+        }
+      }
+    }
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(const device_type& dev) const {
+    if (c.newton_pair) compute_item<0,1>(dev,list);
+    else compute_item<0,0>(dev,list);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(const device_type& dev, value_type &energy_virial) const {
+    if (c.newton_pair)
+      energy_virial += compute_item<1,1>(dev,list);
+    else
+      energy_virial += compute_item<1,0>(dev,list);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  static void init(volatile value_type &update) {
+    update.evdwl = 0;
+    update.ecoul = 0;
+    update.v[0] = 0;
+    update.v[1] = 0;
+    update.v[2] = 0;
+    update.v[3] = 0;
+    update.v[4] = 0;
+    update.v[5] = 0;
+  }
+  KOKKOS_INLINE_FUNCTION
+  static void join(volatile value_type &update,
+                   const volatile value_type &source) {
+    update.evdwl += source.evdwl;
+    update.ecoul += source.ecoul;
+    update.v[0] += source.v[0];
+    update.v[1] += source.v[1];
+    update.v[2] += source.v[2];
+    update.v[3] += source.v[3];
+    update.v[4] += source.v[4];
+    update.v[5] += source.v[5];
+  }
+
+
+};
+
+template <class PairStyle, bool STACKPARAMS, class Specialisation>
+struct PairComputeFunctor<PairStyle,N2,STACKPARAMS,Specialisation>  {
+  typedef typename PairStyle::device_type device_type ;
+  typedef EV_FLOAT value_type;
+
+  PairStyle c;
+  NeighListKokkos<device_type> list;
+
+  PairComputeFunctor(PairStyle* c_ptr,
+                          NeighListKokkos<device_type>* list_ptr):
+  c(*c_ptr),list(*list_ptr) {};
+  ~PairComputeFunctor() {c.cleanup_copy();list.clean_copy();};
+
+  KOKKOS_INLINE_FUNCTION int sbmask(const int& j) const {
+    return j >> SBBITS & 3;
+  }
+
+  template<int EVFLAG, int NEWTON_PAIR>
+  KOKKOS_FUNCTION
+  EV_FLOAT compute_item(const int& ii,
+                        const NeighListKokkos<device_type> &list) const {
+    EV_FLOAT ev;
+    const int i = ii;//list.d_ilist[ii];
+    const X_FLOAT xtmp = c.x(i,0);
+    const X_FLOAT ytmp = c.x(i,1);
+    const X_FLOAT ztmp = c.x(i,2);
+    const int itype = c.type(i);
+
+    //const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
+    const int jnum = c.nall;
+
+    F_FLOAT fxtmp = 0.0;
+    F_FLOAT fytmp = 0.0;
+    F_FLOAT fztmp = 0.0;
+
+    for (int jj = 0; jj < jnum; jj++) {
+      int j = jj;//neighbors_i(jj);
+      if(i==j) continue;
+      const F_FLOAT factor_lj = c.special_lj[sbmask(j)];
+      j &= NEIGHMASK;
+      const X_FLOAT delx = xtmp - c.x(j,0);
+      const X_FLOAT dely = ytmp - c.x(j,1);
+      const X_FLOAT delz = ztmp - c.x(j,2);
+      const int jtype = c.type(j);
+      const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
+
+      if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) {
+
+        const F_FLOAT fpair = factor_lj*c.template compute_fpair<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
+        fxtmp += delx*fpair;
+        fytmp += dely*fpair;
+        fztmp += delz*fpair;
+
+        if (EVFLAG) {
+          if (c.eflag) {
+            ev.evdwl += 0.5*
+              factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
+            if (c.COUL_FLAG)
+              ev.ecoul += 0.5*
+                factor_lj * c.template compute_ecoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
+          }
+
+          if (c.vflag_either) ev_tally(ev,i,j,fpair,delx,dely,delz);
+        }
+      }
+    }
+
+    c.f(i,0) += fxtmp;
+    c.f(i,1) += fytmp;
+    c.f(i,2) += fztmp;
+
+    return ev;
+  }
+
+  KOKKOS_INLINE_FUNCTION
+    void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
+      const F_FLOAT &fpair, const F_FLOAT &delx,
+                  const F_FLOAT &dely, const F_FLOAT &delz) const
+  {
+    const int EFLAG = c.eflag;
+    const int VFLAG = c.vflag_either;
+
+    if (EFLAG) {
+      if (c.eflag_atom) {
+        const E_FLOAT epairhalf = 0.5 * (ev.evdwl + ev.ecoul);
+        if (i < c.nlocal) c.eatom[i] += epairhalf;
+        if (j < c.nlocal) c.eatom[j] += epairhalf;
+      }
+    }
+
+    if (VFLAG) {
+      const E_FLOAT v0 = delx*delx*fpair;
+      const E_FLOAT v1 = dely*dely*fpair;
+      const E_FLOAT v2 = delz*delz*fpair;
+      const E_FLOAT v3 = delx*dely*fpair;
+      const E_FLOAT v4 = delx*delz*fpair;
+      const E_FLOAT v5 = dely*delz*fpair;
+
+      if (c.vflag_global) {
+          ev.v[0] += 0.5*v0;
+          ev.v[1] += 0.5*v1;
+          ev.v[2] += 0.5*v2;
+          ev.v[3] += 0.5*v3;
+          ev.v[4] += 0.5*v4;
+          ev.v[5] += 0.5*v5;
+      }
+
+      if (c.vflag_atom) {
+        if (i < c.nlocal) {
+          c.d_vatom(i,0) += 0.5*v0;
+          c.d_vatom(i,1) += 0.5*v1;
+          c.d_vatom(i,2) += 0.5*v2;
+          c.d_vatom(i,3) += 0.5*v3;
+          c.d_vatom(i,4) += 0.5*v4;
+          c.d_vatom(i,5) += 0.5*v5;
+        }
+      }
+    }
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(const int i) const {
+    compute_item<0,0>(i,list);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(const int i, value_type &energy_virial) const {
+    energy_virial += compute_item<1,0>(i,list);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  static void init(volatile value_type &update) {
+    update.evdwl = 0;
+    update.ecoul = 0;
+    update.v[0] = 0;
+    update.v[1] = 0;
+    update.v[2] = 0;
+    update.v[3] = 0;
+    update.v[4] = 0;
+    update.v[5] = 0;
+  }
+  KOKKOS_INLINE_FUNCTION
+  static void join(volatile value_type &update,
+                   const volatile value_type &source) {
+    update.evdwl += source.evdwl;
+    update.ecoul += source.ecoul;
+    update.v[0] += source.v[0];
+    update.v[1] += source.v[1];
+    update.v[2] += source.v[2];
+    update.v[3] += source.v[3];
+    update.v[4] += source.v[4];
+    update.v[5] += source.v[5];
+  }
+
+
+};
+
+template<class PairStyle, class Specialisation>
+EV_FLOAT pair_compute (PairStyle* fpair, NeighListKokkos<typename PairStyle::device_type>* list) {
+  EV_FLOAT ev;
+  if(fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
+    if (fpair->neighflag == FULL) {
+      PairComputeFunctor<PairStyle,FULL,false,Specialisation >
+        ff(fpair, list);
+      if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
+      else Kokkos::parallel_for(list->inum,ff);
+    } else if (fpair->neighflag == HALFTHREAD) {
+      PairComputeFunctor<PairStyle,HALFTHREAD,false,Specialisation >
+        ff(fpair, list);
+      if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
+      else Kokkos::parallel_for(list->inum,ff);
+    } else if (fpair->neighflag == HALF) {
+      PairComputeFunctor<PairStyle,HALF,false,Specialisation >
+        ff(fpair, list);
+      if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
+      else Kokkos::parallel_for(list->inum,ff);
+    } else if (fpair->neighflag == N2) {
+      PairComputeFunctor<PairStyle,N2,false,Specialisation >
+        ff(fpair, list);
+      if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(fpair->nlocal,ff,ev);
+      else Kokkos::parallel_for(fpair->nlocal,ff);
+    } else if (fpair->neighflag == FULLCLUSTER) {
+      typedef PairComputeFunctor<PairStyle,FULLCLUSTER,false,Specialisation >
+        f_type;
+      f_type ff(fpair, list);
+      #ifdef KOKKOS_HAVE_CUDA
+        const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 256 : 1;
+      #else
+        const int teamsize = 1;
+      #endif
+      const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize;
+      if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(Kokkos::ParallelWorkRequest(nteams,teamsize),ff,ev);
+      else Kokkos::parallel_for(Kokkos::ParallelWorkRequest(nteams,teamsize),ff);
+    }
+  } else {
+    if (fpair->neighflag == FULL) {
+      PairComputeFunctor<PairStyle,FULL,true,Specialisation >
+        ff(fpair, list);
+      if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
+      else Kokkos::parallel_for(list->inum,ff);
+    } else if (fpair->neighflag == HALFTHREAD) {
+      PairComputeFunctor<PairStyle,HALFTHREAD,true,Specialisation >
+        ff(fpair, list);
+      if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
+      else Kokkos::parallel_for(list->inum,ff);
+    } else if (fpair->neighflag == HALF) {
+      PairComputeFunctor<PairStyle,HALF,true,Specialisation >
+        ff(fpair, list);
+      if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
+      else Kokkos::parallel_for(list->inum,ff);
+    } else if (fpair->neighflag == N2) {
+      PairComputeFunctor<PairStyle,N2,true,Specialisation >
+        ff(fpair, list);
+      if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(fpair->nlocal,ff,ev);
+      else Kokkos::parallel_for(fpair->nlocal,ff);
+    } else if (fpair->neighflag == FULLCLUSTER) {
+      typedef PairComputeFunctor<PairStyle,FULLCLUSTER,true,Specialisation >
+        f_type;
+      f_type ff(fpair, list);
+      #ifdef KOKKOS_HAVE_CUDA
+        const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 256 : 1;
+      #else
+        const int teamsize = 1;
+      #endif
+      const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize;
+      if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(Kokkos::ParallelWorkRequest(nteams,teamsize),ff,ev);
+      else Kokkos::parallel_for(Kokkos::ParallelWorkRequest(nteams,teamsize),ff);
+    }
+  }
+  return ev;
+}
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+*/
diff --git a/src/KOKKOS/pair_lj_cut_kokkos.cpp b/src/KOKKOS/pair_lj_cut_kokkos.cpp
new file mode 100644
index 0000000000..94576a36c7
--- /dev/null
+++ b/src/KOKKOS/pair_lj_cut_kokkos.cpp
@@ -0,0 +1,267 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "string.h"
+#include "pair_lj_cut_kokkos.h"
+#include "kokkos.h"
+#include "atom_kokkos.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "neigh_request.h"
+#include "update.h"
+#include "integrate.h"
+#include "respa.h"
+#include "math_const.h"
+#include "memory.h"
+#include "error.h"
+#include "atom_masks.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define KOKKOS_CUDA_MAX_THREADS 256
+#define KOKKOS_CUDA_MIN_BLOCKS 8
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairLJCutKokkos<DeviceType>::PairLJCutKokkos(LAMMPS *lmp) : PairLJCut(lmp)
+{
+  respa_enable = 0;
+
+  atomKK = (AtomKokkos *) atom;
+  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
+  datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK;
+  datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
+  cutsq = NULL;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairLJCutKokkos<DeviceType>::~PairLJCutKokkos()
+{
+  if (allocated) {
+    k_cutsq = DAT::tdual_ffloat_2d();
+    memory->sfree(cutsq);
+    cutsq = NULL;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairLJCutKokkos<DeviceType>::cleanup_copy() {
+  // WHY needed: this prevents parent copy from deallocating any arrays
+  allocated = 0;
+  cutsq = NULL;
+  eatom = NULL;
+  vatom = NULL;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairLJCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
+{
+  eflag = eflag_in;
+  vflag = vflag_in;
+
+
+  if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
+
+  double evdwl = 0.0;
+  if (eflag || vflag) ev_setup(eflag,vflag);
+  else evflag = vflag_fdotr = 0;
+
+  atomKK->sync(execution_space,datamask_read);
+  k_cutsq.template sync<DeviceType>();
+  k_params.template sync<DeviceType>();
+  if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
+  else atomKK->modified(execution_space,F_MASK);
+
+  x = atomKK->k_x.view<DeviceType>();
+  c_x = atomKK->k_x.view<DeviceType>();
+  f = atomKK->k_f.view<DeviceType>();
+  type = atomKK->k_type.view<DeviceType>();
+  nlocal = atom->nlocal;
+  nall = atom->nlocal + atom->nghost;
+  special_lj[0] = force->special_lj[0];
+  special_lj[1] = force->special_lj[1];
+  special_lj[2] = force->special_lj[2];
+  special_lj[3] = force->special_lj[3];
+  newton_pair = force->newton_pair;
+
+  // loop over neighbors of my atoms
+
+  EV_FLOAT ev = pair_compute<PairLJCutKokkos<DeviceType>,void >(this,(NeighListKokkos<DeviceType>*)list);
+
+  DeviceType::fence();
+
+  if (eflag) eng_vdwl += ev.evdwl;
+  if (vflag_global) {
+    virial[0] += ev.v[0];
+    virial[1] += ev.v[1];
+    virial[2] += ev.v[2];
+    virial[3] += ev.v[3];
+    virial[4] += ev.v[4];
+    virial[5] += ev.v[5];
+  }
+
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+template<class DeviceType>
+template<bool STACKPARAMS, class Specialisation>
+KOKKOS_INLINE_FUNCTION
+F_FLOAT PairLJCutKokkos<DeviceType>::
+compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const {
+  const F_FLOAT r2inv = 1.0/rsq;
+  const F_FLOAT r6inv = r2inv*r2inv*r2inv;
+
+  const F_FLOAT forcelj = r6inv *
+    ((STACKPARAMS?m_params[itype][jtype].lj1:params(itype,jtype).lj1)*r6inv -
+     (STACKPARAMS?m_params[itype][jtype].lj2:params(itype,jtype).lj2));
+  return forcelj*r2inv;
+}
+
+template<class DeviceType>
+template<bool STACKPARAMS, class Specialisation>
+KOKKOS_INLINE_FUNCTION
+F_FLOAT PairLJCutKokkos<DeviceType>::
+compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const {
+  const F_FLOAT r2inv = 1.0/rsq;
+  const F_FLOAT r6inv = r2inv*r2inv*r2inv;
+  return r6inv*((STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv -
+                (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)) -
+                (STACKPARAMS?m_params[itype][jtype].offset:params(itype,jtype).offset);
+}
+
+/* ----------------------------------------------------------------------
+   allocate all arrays
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairLJCutKokkos<DeviceType>::allocate()
+{
+  PairLJCut::allocate();
+
+  int n = atom->ntypes;
+  memory->destroy(cutsq);
+  memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq");
+  d_cutsq = k_cutsq.template view<DeviceType>();
+  k_params = Kokkos::DualView<params_lj**,Kokkos::LayoutRight,DeviceType>("PairLJCut::params",n+1,n+1);
+  params = k_params.d_view;
+}
+
+/* ----------------------------------------------------------------------
+   global settings
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairLJCutKokkos<DeviceType>::settings(int narg, char **arg)
+{
+  if (narg > 2) error->all(FLERR,"Illegal pair_style command");
+
+  PairLJCut::settings(1,arg);
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairLJCutKokkos<DeviceType>::init_style()
+{
+  PairLJCut::init_style();
+
+  // error if rRESPA with inner levels
+
+  if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) {
+    int respa = 0;
+    if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
+    if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
+    if (respa) 
+      error->all(FLERR,"Cannot use Kokkos pair style with rRESPA inner/middle");
+  }
+
+  // irequest = neigh request made by parent class
+
+  neighflag = lmp->kokkos->neighflag;
+  int irequest = neighbor->nrequest - 1;
+
+  neighbor->requests[irequest]->
+    kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value && 
+    !Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
+  neighbor->requests[irequest]->
+    kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
+
+  if (neighflag == FULL) {
+    neighbor->requests[irequest]->full = 1;
+    neighbor->requests[irequest]->half = 0;
+    neighbor->requests[irequest]->full_cluster = 0;
+  } else if (neighflag == HALF || neighflag == HALFTHREAD) {
+    neighbor->requests[irequest]->full = 0;
+    neighbor->requests[irequest]->half = 1;
+    neighbor->requests[irequest]->full_cluster = 0;
+  } else if (neighflag == N2) {
+    neighbor->requests[irequest]->full = 0;
+    neighbor->requests[irequest]->half = 0;
+    neighbor->requests[irequest]->full_cluster = 0;
+  } else if (neighflag == FULLCLUSTER) {
+    neighbor->requests[irequest]->full_cluster = 1;
+    neighbor->requests[irequest]->full = 1;
+    neighbor->requests[irequest]->half = 0;
+  } else {
+    error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/kk");
+  }
+}
+
+/* ----------------------------------------------------------------------
+   init for one type pair i,j and corresponding j,i
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+double PairLJCutKokkos<DeviceType>::init_one(int i, int j)
+{
+  double cutone = PairLJCut::init_one(i,j);
+
+  k_params.h_view(i,j).lj1 = lj1[i][j];
+  k_params.h_view(i,j).lj2 = lj2[i][j];
+  k_params.h_view(i,j).lj3 = lj3[i][j];
+  k_params.h_view(i,j).lj4 = lj4[i][j];
+  k_params.h_view(i,j).offset = offset[i][j];
+  k_params.h_view(i,j).cutsq = cutone*cutone;
+  k_params.h_view(j,i) = k_params.h_view(i,j);
+  if(i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) {
+    m_params[i][j] = m_params[j][i] = k_params.h_view(i,j);
+    m_cutsq[j][i] = m_cutsq[i][j] = cutone*cutone;
+  }
+  k_cutsq.h_view(i,j) = cutone*cutone;
+  k_cutsq.template modify<LMPHostType>();
+  k_params.template modify<LMPHostType>();
+
+  return cutone;
+}
+
+
+
+template class PairLJCutKokkos<LMPDeviceType>;
+#if DEVICE==2
+template class PairLJCutKokkos<LMPHostType>;
+#endif
diff --git a/src/KOKKOS/pair_lj_cut_kokkos.h b/src/KOKKOS/pair_lj_cut_kokkos.h
new file mode 100644
index 0000000000..5c3c002af5
--- /dev/null
+++ b/src/KOKKOS/pair_lj_cut_kokkos.h
@@ -0,0 +1,112 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(lj/cut/kk,PairLJCutKokkos<LMPDeviceType>)
+PairStyle(lj/cut/kk/device,PairLJCutKokkos<LMPDeviceType>)
+PairStyle(lj/cut/kk/host,PairLJCutKokkos<LMPHostType>)
+
+#else
+
+#ifndef LMP_PAIR_LJ_CUT_KOKKOS_H
+#define LMP_PAIR_LJ_CUT_KOKKOS_H
+
+#include "pair_kokkos.h"
+#include "pair_lj_cut.h"
+#include "neigh_list_kokkos.h"
+
+namespace LAMMPS_NS {
+
+template<class DeviceType>
+class PairLJCutKokkos : public PairLJCut {
+ public:
+  enum {COUL_FLAG=0};
+  typedef DeviceType device_type;
+  PairLJCutKokkos(class LAMMPS *);
+  ~PairLJCutKokkos();
+
+  void compute(int, int);
+
+  void settings(int, char **);
+  void init_style();
+  double init_one(int, int);
+
+  struct params_lj{
+    params_lj(){cutsq=0,lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
+    params_lj(int i){cutsq=0,lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
+    F_FLOAT cutsq,lj1,lj2,lj3,lj4,offset;
+  };
+
+ protected:
+  void cleanup_copy();
+
+  template<bool STACKPARAMS, class Specialisation>
+  KOKKOS_INLINE_FUNCTION
+  F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const;
+
+  template<bool STACKPARAMS, class Specialisation>
+  KOKKOS_INLINE_FUNCTION
+  F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const;
+
+  template<bool STACKPARAMS, class Specialisation>
+  KOKKOS_INLINE_FUNCTION
+  F_FLOAT compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const {
+    return 0;
+  }
+
+
+  Kokkos::DualView<params_lj**,Kokkos::LayoutRight,DeviceType> k_params;
+  typename Kokkos::DualView<params_lj**,Kokkos::LayoutRight,DeviceType>::t_dev_const params;
+  params_lj m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];  // hardwired to space for 15 atom types
+  F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
+  typename ArrayTypes<DeviceType>::t_x_array_randomread x;
+  typename ArrayTypes<DeviceType>::t_x_array c_x;
+  typename ArrayTypes<DeviceType>::t_f_array f;
+  typename ArrayTypes<DeviceType>::t_int_1d_randomread type;
+  typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
+  typename ArrayTypes<DeviceType>::t_virial_array d_vatom;
+  //typename ArrayTypes<DeviceType>::t_ffloat_1d special_lj;
+
+  int newton_pair;
+
+  typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cutsq;
+  typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq;
+
+  class AtomKokkos *atomKK;
+  int neighflag;
+  int nlocal,nall,eflag,vflag;
+
+  void allocate();
+  friend class PairComputeFunctor<PairLJCutKokkos,FULL,true>;
+  friend class PairComputeFunctor<PairLJCutKokkos,HALF,true>;
+  friend class PairComputeFunctor<PairLJCutKokkos,HALFTHREAD,true>;
+  friend class PairComputeFunctor<PairLJCutKokkos,N2,true>;
+  friend class PairComputeFunctor<PairLJCutKokkos,FULLCLUSTER,true >;
+  friend class PairComputeFunctor<PairLJCutKokkos,FULL,false>;
+  friend class PairComputeFunctor<PairLJCutKokkos,HALF,false>;
+  friend class PairComputeFunctor<PairLJCutKokkos,HALFTHREAD,false>;
+  friend class PairComputeFunctor<PairLJCutKokkos,N2,false>;
+  friend class PairComputeFunctor<PairLJCutKokkos,FULLCLUSTER,false >;
+  friend EV_FLOAT pair_compute<PairLJCutKokkos,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
+
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+*/
diff --git a/src/KOKKOS/pair_table_kokkos.cpp b/src/KOKKOS/pair_table_kokkos.cpp
new file mode 100644
index 0000000000..cc8072991a
--- /dev/null
+++ b/src/KOKKOS/pair_table_kokkos.cpp
@@ -0,0 +1,1500 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Paul Crozier (SNL)
+------------------------------------------------------------------------- */
+
+#include "mpi.h"
+#include "math.h"
+#include "stdlib.h"
+#include "string.h"
+#include "pair_table_kokkos.h"
+#include "kokkos.h"
+#include "atom.h"
+#include "force.h"
+#include "comm.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "neigh_request.h"
+#include "memory.h"
+#include "error.h"
+#include "atom_masks.h"
+
+using namespace LAMMPS_NS;
+
+enum{NONE,RLINEAR,RSQ,BMP};
+enum{FULL,HALFTHREAD,HALF};
+
+#define MAXLINE 1024
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairTableKokkos<DeviceType>::PairTableKokkos(LAMMPS *lmp) : Pair(lmp)
+{
+  update_table = 0;
+  atomKK = (AtomKokkos *) atom;
+  ntables = 0;
+  tables = NULL;
+  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
+  datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK;
+  datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
+  h_table = new TableHost();
+  d_table = new TableDevice();
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairTableKokkos<DeviceType>::~PairTableKokkos()
+{
+/*  for (int m = 0; m < ntables; m++) free_table(&tables[m]);
+  memory->sfree(tables);
+
+  if (allocated) {
+    memory->destroy(setflag);
+    memory->destroy(cutsq);
+    memory->destroy(tabindex);
+  }*/
+  delete h_table;
+  delete d_table;
+
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairTableKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
+{
+  if(update_table)
+    create_kokkos_tables();
+  if(tabstyle == LOOKUP)
+    compute_style<LOOKUP>(eflag_in,vflag_in);
+  if(tabstyle == LINEAR)
+    compute_style<LINEAR>(eflag_in,vflag_in);
+  if(tabstyle == SPLINE)
+    compute_style<SPLINE>(eflag_in,vflag_in);
+  if(tabstyle == BITMAP)
+    compute_style<BITMAP>(eflag_in,vflag_in);
+}
+
+template<class DeviceType>
+template<int TABSTYLE>
+void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
+{
+  eflag = eflag_in;
+  vflag = vflag_in;
+
+  if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
+
+  double evdwl = 0.0;
+  if (eflag || vflag) ev_setup(eflag,vflag);
+  else evflag = vflag_fdotr = 0;
+
+  atomKK->sync(execution_space,datamask_read);
+  //k_cutsq.template sync<DeviceType>();
+  //k_params.template sync<DeviceType>();
+  if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
+  else atomKK->modified(execution_space,F_MASK);
+
+  x = c_x = atomKK->k_x.view<DeviceType>();
+  f = atomKK->k_f.view<DeviceType>();
+  type = atomKK->k_type.view<DeviceType>();
+  nlocal = atom->nlocal;
+  nall = atom->nlocal + atom->nghost;
+  special_lj[0] = force->special_lj[0];
+  special_lj[1] = force->special_lj[1];
+  special_lj[2] = force->special_lj[2];
+  special_lj[3] = force->special_lj[3];
+  newton_pair = force->newton_pair;
+  d_cutsq = d_table->cutsq;
+  // loop over neighbors of my atoms
+
+  EV_FLOAT ev;
+  if(atom->ntypes > MAX_TYPES_STACKPARAMS) {
+    if (neighflag == FULL) {
+      PairComputeFunctor<PairTableKokkos<DeviceType>,FULL,false,S_TableCompute<DeviceType,TABSTYLE> >
+        ff(this,(NeighListKokkos<DeviceType>*) list);
+      if (eflag || vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
+      else Kokkos::parallel_for(list->inum,ff);
+    } else if (neighflag == HALFTHREAD) {
+      PairComputeFunctor<PairTableKokkos<DeviceType>,HALFTHREAD,false,S_TableCompute<DeviceType,TABSTYLE> >
+        ff(this,(NeighListKokkos<DeviceType>*) list);
+      if (eflag || vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
+      else Kokkos::parallel_for(list->inum,ff);
+    } else if (neighflag == HALF) {
+      PairComputeFunctor<PairTableKokkos<DeviceType>,HALF,false,S_TableCompute<DeviceType,TABSTYLE> >
+        f(this,(NeighListKokkos<DeviceType>*) list);
+      if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev);
+      else Kokkos::parallel_for(list->inum,f);
+    } else if (neighflag == N2) {
+      PairComputeFunctor<PairTableKokkos<DeviceType>,N2,false,S_TableCompute<DeviceType,TABSTYLE> >
+        f(this,(NeighListKokkos<DeviceType>*) list);
+      if (eflag || vflag) Kokkos::parallel_reduce(nlocal,f,ev);
+      else Kokkos::parallel_for(nlocal,f);
+    } else if (neighflag == FULLCLUSTER) {
+      typedef PairComputeFunctor<PairTableKokkos<DeviceType>,FULLCLUSTER,false,S_TableCompute<DeviceType,TABSTYLE> >
+        f_type;
+      f_type f(this,(NeighListKokkos<DeviceType>*) list);
+      #ifdef KOKKOS_HAVE_CUDA
+        const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 256 : 1;
+      #else
+        const int teamsize = 1;
+      #endif
+      const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize;
+      if (eflag || vflag) Kokkos::parallel_reduce(Kokkos::ParallelWorkRequest(nteams,teamsize),f,ev);
+      else Kokkos::parallel_for(Kokkos::ParallelWorkRequest(nteams,teamsize),f);
+    }
+  } else {
+    if (neighflag == FULL) {
+      PairComputeFunctor<PairTableKokkos<DeviceType>,FULL,true,S_TableCompute<DeviceType,TABSTYLE> >
+        f(this,(NeighListKokkos<DeviceType>*) list);
+      if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev);
+      else Kokkos::parallel_for(list->inum,f);
+    } else if (neighflag == HALFTHREAD) {
+      PairComputeFunctor<PairTableKokkos<DeviceType>,HALFTHREAD,true,S_TableCompute<DeviceType,TABSTYLE> >
+        f(this,(NeighListKokkos<DeviceType>*) list);
+      if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev);
+      else Kokkos::parallel_for(list->inum,f);
+    } else if (neighflag == HALF) {
+      PairComputeFunctor<PairTableKokkos<DeviceType>,HALF,true,S_TableCompute<DeviceType,TABSTYLE> >
+        f(this,(NeighListKokkos<DeviceType>*) list);
+      if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev);
+      else Kokkos::parallel_for(list->inum,f);
+    } else if (neighflag == N2) {
+      PairComputeFunctor<PairTableKokkos<DeviceType>,N2,true,S_TableCompute<DeviceType,TABSTYLE> >
+        f(this,(NeighListKokkos<DeviceType>*) list);
+      if (eflag || vflag) Kokkos::parallel_reduce(nlocal,f,ev);
+      else Kokkos::parallel_for(nlocal,f);
+    } else if (neighflag == FULLCLUSTER) {
+      typedef PairComputeFunctor<PairTableKokkos<DeviceType>,FULLCLUSTER,true,S_TableCompute<DeviceType,TABSTYLE> >
+        f_type;
+      f_type f(this,(NeighListKokkos<DeviceType>*) list);
+      #ifdef KOKKOS_HAVE_CUDA
+        const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 256 : 1;
+      #else
+        const int teamsize = 1;
+      #endif
+      const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize;
+      if (eflag || vflag) Kokkos::parallel_reduce(Kokkos::ParallelWorkRequest(nteams,teamsize),f,ev);
+      else Kokkos::parallel_for(Kokkos::ParallelWorkRequest(nteams,teamsize),f);
+    }
+  }
+  DeviceType::fence();
+
+  if (eflag) eng_vdwl += ev.evdwl;
+  if (vflag_global) {
+    virial[0] += ev.v[0];
+    virial[1] += ev.v[1];
+    virial[2] += ev.v[2];
+    virial[3] += ev.v[3];
+    virial[4] += ev.v[4];
+    virial[5] += ev.v[5];
+  }
+
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+template<class DeviceType>
+template<bool STACKPARAMS, class Specialisation>
+KOKKOS_INLINE_FUNCTION
+F_FLOAT PairTableKokkos<DeviceType>::
+compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const {
+  union_int_float_t rsq_lookup;
+  double fpair;
+  const int tidx = d_table_const.tabindex(itype,jtype);
+  //const Table* const tb = &tables[tabindex[itype][jtype]];
+
+  //if (rsq < d_table_const.innersq(tidx))
+  //  error->one(FLERR,"Pair distance < table inner cutoff");
+
+  if (Specialisation::TabStyle == LOOKUP) {
+    const int itable = static_cast<int> ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
+    //if (itable >= tlm1)
+    //  error->one(FLERR,"Pair distance > table outer cutoff");
+    fpair = d_table_const.f(tidx,itable);
+  } else if (Specialisation::TabStyle == LINEAR) {
+    const int itable = static_cast<int> ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
+    //if (itable >= tlm1)
+    //  error->one(FLERR,"Pair distance > table outer cutoff");
+    const double fraction = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx);
+    fpair = d_table_const.f(tidx,itable) + fraction*d_table_const.df(tidx,itable);
+  } else if (Specialisation::TabStyle == SPLINE) {
+    const int itable = static_cast<int> ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
+    //if (itable >= tlm1)
+    //  error->one(FLERR,"Pair distance > table outer cutoff");
+    const double b = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx);
+    const double a = 1.0 - b;
+    fpair = a * d_table_const.f(tidx,itable) + b * d_table_const.f(tidx,itable+1) +
+      ((a*a*a-a)*d_table_const.f2(tidx,itable) + (b*b*b-b)*d_table_const.f2(tidx,itable+1)) *
+      d_table_const.deltasq6(tidx);
+  } else {
+    rsq_lookup.f = rsq;
+    int itable = rsq_lookup.i & d_table_const.nmask(tidx);
+    itable >>= d_table_const.nshiftbits(tidx);
+    const double fraction = (rsq_lookup.f - d_table_const.rsq(tidx,itable)) * d_table_const.drsq(tidx,itable);
+    fpair = d_table_const.f(tidx,itable) + fraction*d_table_const.df(tidx,itable);
+  }
+  return fpair;
+}
+
+template<class DeviceType>
+template<bool STACKPARAMS, class Specialisation>
+KOKKOS_INLINE_FUNCTION
+F_FLOAT PairTableKokkos<DeviceType>::
+compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const {
+  double evdwl;
+  union_int_float_t rsq_lookup;
+  const int tidx = d_table_const.tabindex(itype,jtype);
+  //const Table* const tb = &tables[tabindex[itype][jtype]];
+
+  //if (rsq < d_table_const.innersq(tidx))
+  //  error->one(FLERR,"Pair distance < table inner cutoff");
+
+  if (Specialisation::TabStyle == LOOKUP) {
+    const int itable = static_cast<int> ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
+    //if (itable >= tlm1)
+    //  error->one(FLERR,"Pair distance > table outer cutoff");
+    evdwl = d_table_const.e(tidx,itable);
+  } else if (Specialisation::TabStyle == LINEAR) {
+    const int itable = static_cast<int> ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
+    //if (itable >= tlm1)
+    //  error->one(FLERR,"Pair distance > table outer cutoff");
+    const double fraction = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx);
+    evdwl = d_table_const.e(tidx,itable) + fraction*d_table_const.de(tidx,itable);
+  } else if (Specialisation::TabStyle == SPLINE) {
+    const int itable = static_cast<int> ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
+    //if (itable >= tlm1)
+    //  error->one(FLERR,"Pair distance > table outer cutoff");
+    const double b = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx);
+    const double a = 1.0 - b;
+    evdwl = a * d_table_const.e(tidx,itable) + b * d_table_const.e(tidx,itable+1) +
+        ((a*a*a-a)*d_table_const.e2(tidx,itable) + (b*b*b-b)*d_table_const.e2(tidx,itable+1)) *
+        d_table_const.deltasq6(tidx);
+  } else {
+    rsq_lookup.f = rsq;
+    int itable = rsq_lookup.i & d_table_const.nmask(tidx);
+    itable >>= d_table_const.nshiftbits(tidx);
+    const double fraction = (rsq_lookup.f - d_table_const.rsq(tidx,itable)) * d_table_const.drsq(tidx,itable);
+    evdwl = d_table_const.e(tidx,itable) + fraction*d_table_const.de(tidx,itable);
+  }
+  return evdwl;
+}
+
+/*
+template<class DeviceType>
+template<int EVFLAG, int NEIGHFLAG, int NEWTON_PAIR,int TABSTYLE>
+KOKKOS_FUNCTION
+EV_FLOAT PairTableKokkos<DeviceType>::
+compute_item(const int &ii, const NeighListKokkos<DeviceType> &list) const
+{
+  EV_FLOAT ev;
+  const int tlm1 = tablength - 1;
+  union_int_float_t rsq_lookup;
+  const int i = list.d_ilist[ii];
+  const X_FLOAT xtmp = x(i,0);
+  const X_FLOAT ytmp = x(i,1);
+  const X_FLOAT ztmp = x(i,2);
+  const int itype = type(i);
+
+  const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
+  const int jnum = list.d_numneigh[i];
+
+  F_FLOAT fxtmp = 0.0;
+  F_FLOAT fytmp = 0.0;
+  F_FLOAT fztmp = 0.0;
+
+  for (int jj = 0; jj < jnum; jj++) {
+    int j = neighbors_i(jj);
+    const F_FLOAT factor_lj = 1.0;  //special_lj[sbmask(j)];
+    j &= NEIGHMASK;
+    const X_FLOAT delx = xtmp - x(j,0);
+    const X_FLOAT dely = ytmp - x(j,1);
+    const X_FLOAT delz = ztmp - x(j,2);
+    const int jtype = type(j);
+    const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
+
+      if (rsq < d_table_const.cutsq(itype,jtype)) {
+        double fpair; 
+        const int tidx = d_table_const.tabindex(itype,jtype);
+        //const Table* const tb = &tables[tabindex[itype][jtype]];
+        
+        //if (rsq < d_table_const.innersq(tidx))
+        //  error->one(FLERR,"Pair distance < table inner cutoff");
+
+        if (TABSTYLE == LOOKUP) {
+          const int itable = static_cast<int> ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
+          //if (itable >= tlm1)
+          //  error->one(FLERR,"Pair distance > table outer cutoff");
+          fpair = factor_lj * d_table_const.f(tidx,itable);
+          if (EVFLAG)
+            ev.evdwl = d_table_const.e(tidx,itable);
+        } else if (TABSTYLE == LINEAR) {
+          const int itable = static_cast<int> ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
+          //if (itable >= tlm1)
+          //  error->one(FLERR,"Pair distance > table outer cutoff");
+          const double fraction = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx);
+          const double value = d_table_const.f(tidx,itable) + fraction*d_table_const.df(tidx,itable);          
+          fpair = factor_lj * value;
+          if (EVFLAG)
+            ev.evdwl = d_table_const.e(tidx,itable) + fraction*d_table_const.de(tidx,itable);
+        } else if (TABSTYLE == SPLINE) {
+          const int itable = static_cast<int> ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
+          //if (itable >= tlm1)
+          //  error->one(FLERR,"Pair distance > table outer cutoff");
+          const double b = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx);
+          const double a = 1.0 - b;
+          const double value = a * d_table_const.f(tidx,itable) + b * d_table_const.f(tidx,itable+1) +
+            ((a*a*a-a)*d_table_const.f2(tidx,itable) + (b*b*b-b)*d_table_const.f2(tidx,itable+1)) *
+            d_table_const.deltasq6(tidx);
+          fpair = factor_lj * value;
+          if (EVFLAG)
+            ev.evdwl = a * d_table_const.e(tidx,itable) + b * d_table_const.e(tidx,itable+1) +
+              ((a*a*a-a)*d_table_const.e2(tidx,itable) + (b*b*b-b)*d_table_const.e2(tidx,itable+1)) *
+              d_table_const.deltasq6(tidx);
+        } else {
+          rsq_lookup.f = rsq;
+          int itable = rsq_lookup.i & d_table_const.nmask(tidx);
+          itable >>= d_table_const.nshiftbits(tidx);
+          const double fraction = (rsq_lookup.f - d_table_const.rsq(tidx,itable)) * d_table_const.drsq(tidx,itable);
+          const double value = d_table_const.f(tidx,itable) + fraction*d_table_const.df(tidx,itable);
+          fpair = factor_lj * value;
+          if (EVFLAG)
+            ev.evdwl = d_table_const.e(tidx,itable) + fraction*d_table_const.de(tidx,itable);
+        }
+
+      fxtmp += delx*fpair;
+      fytmp += dely*fpair;
+      fztmp += delz*fpair;
+      if ((NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) {
+        Kokkos::atomic_fetch_add(&f(j,0),-delx*fpair);
+        Kokkos::atomic_fetch_add(&f(j,1),-dely*fpair);
+        Kokkos::atomic_fetch_add(&f(j,2),-delz*fpair);
+      }
+
+      if ((NEIGHFLAG==HALF) && (NEWTON_PAIR || j < nlocal)) {
+        f(j,0) -= delx*fpair;
+        f(j,1) -= dely*fpair;
+        f(j,2) -= delz*fpair;
+      }
+
+      if(EVFLAG) {
+        if (eflag) {
+          ev.evdwl *= factor_lj;
+        }
+
+        if (evflag) ev_tally<NEIGHFLAG>(ev,i,j
+,fpair,delx,dely,delz);
+      }  
+    }
+  }
+
+  if (NEIGHFLAG == HALFTHREAD) {
+    Kokkos::atomic_fetch_add(&f(i,0),fxtmp);
+    Kokkos::atomic_fetch_add(&f(i,1),fytmp);
+    Kokkos::atomic_fetch_add(&f(i,2),fztmp);
+  } else {
+    f(i,0) += fxtmp;
+    f(i,1) += fytmp;
+    f(i,2) += fztmp;
+  }
+
+  return ev;
+}
+*/
+
+template<class DeviceType>
+void PairTableKokkos<DeviceType>::create_kokkos_tables()
+{
+  const int tlm1 = tablength-1;
+
+  memory->create_kokkos(d_table->nshiftbits,h_table->nshiftbits,ntables,"Table::nshiftbits");
+  memory->create_kokkos(d_table->nmask,h_table->nmask,ntables,"Table::nmask");
+  memory->create_kokkos(d_table->innersq,h_table->innersq,ntables,"Table::innersq");
+  memory->create_kokkos(d_table->invdelta,h_table->invdelta,ntables,"Table::invdelta");
+  memory->create_kokkos(d_table->deltasq6,h_table->deltasq6,ntables,"Table::deltasq6");
+
+  if(tabstyle == LOOKUP) {
+    memory->create_kokkos(d_table->e,h_table->e,ntables,tlm1,"Table::e");
+    memory->create_kokkos(d_table->f,h_table->f,ntables,tlm1,"Table::f");
+  }
+
+  if(tabstyle == LINEAR) {
+    memory->create_kokkos(d_table->rsq,h_table->rsq,ntables,tablength,"Table::rsq");
+    memory->create_kokkos(d_table->e,h_table->e,ntables,tablength,"Table::e");
+    memory->create_kokkos(d_table->f,h_table->f,ntables,tablength,"Table::f");
+    memory->create_kokkos(d_table->de,h_table->de,ntables,tlm1,"Table::de");
+    memory->create_kokkos(d_table->df,h_table->df,ntables,tlm1,"Table::df");
+  }
+
+  if(tabstyle == SPLINE) {
+    memory->create_kokkos(d_table->rsq,h_table->rsq,ntables,tablength,"Table::rsq");
+    memory->create_kokkos(d_table->e,h_table->e,ntables,tablength,"Table::e");
+    memory->create_kokkos(d_table->f,h_table->f,ntables,tablength,"Table::f");
+    memory->create_kokkos(d_table->e2,h_table->e2,ntables,tablength,"Table::e2");
+    memory->create_kokkos(d_table->f2,h_table->f2,ntables,tablength,"Table::f2");
+  }
+
+  if(tabstyle == BITMAP) {
+    int ntable = 1 << tablength;
+    memory->create_kokkos(d_table->rsq,h_table->rsq,ntables,ntable,"Table::rsq");
+    memory->create_kokkos(d_table->e,h_table->e,ntables,ntable,"Table::e");
+    memory->create_kokkos(d_table->f,h_table->f,ntables,ntable,"Table::f");
+    memory->create_kokkos(d_table->de,h_table->de,ntables,ntable,"Table::de");
+    memory->create_kokkos(d_table->df,h_table->df,ntables,ntable,"Table::df");
+    memory->create_kokkos(d_table->drsq,h_table->drsq,ntables,ntable,"Table::drsq");
+  }
+
+
+
+  for(int i=0; i < ntables; i++) {
+    Table* tb = &tables[i];
+
+    h_table->nshiftbits[i] = tb->nshiftbits;
+    h_table->nmask[i] = tb->nmask;
+    h_table->innersq[i] = tb->innersq;
+    h_table->invdelta[i] = tb->invdelta;
+    h_table->deltasq6[i] = tb->deltasq6;
+
+    for(int j = 0; j<h_table->rsq.dimension_1(); j++)
+      h_table->rsq(i,j) = tb->rsq[j];
+    for(int j = 0; j<h_table->drsq.dimension_1(); j++)
+      h_table->drsq(i,j) = tb->drsq[j];
+    for(int j = 0; j<h_table->e.dimension_1(); j++)
+      h_table->e(i,j) = tb->e[j];
+    for(int j = 0; j<h_table->de.dimension_1(); j++)
+      h_table->de(i,j) = tb->de[j];
+    for(int j = 0; j<h_table->f.dimension_1(); j++)
+      h_table->f(i,j) = tb->f[j];
+    for(int j = 0; j<h_table->df.dimension_1(); j++)
+      h_table->df(i,j) = tb->df[j];
+    for(int j = 0; j<h_table->e2.dimension_1(); j++)
+      h_table->e2(i,j) = tb->e2[j];
+    for(int j = 0; j<h_table->f2.dimension_1(); j++)
+      h_table->f2(i,j) = tb->f2[j];
+  }
+
+  
+  Kokkos::deep_copy(d_table->nshiftbits,h_table->nshiftbits);
+  Kokkos::deep_copy(d_table->nmask,h_table->nmask);
+  Kokkos::deep_copy(d_table->innersq,h_table->innersq);
+  Kokkos::deep_copy(d_table->invdelta,h_table->invdelta);
+  Kokkos::deep_copy(d_table->deltasq6,h_table->deltasq6);
+  Kokkos::deep_copy(d_table->rsq,h_table->rsq);
+  Kokkos::deep_copy(d_table->drsq,h_table->drsq);
+  Kokkos::deep_copy(d_table->e,h_table->e);
+  Kokkos::deep_copy(d_table->de,h_table->de);
+  Kokkos::deep_copy(d_table->f,h_table->f);
+  Kokkos::deep_copy(d_table->df,h_table->df);
+  Kokkos::deep_copy(d_table->e2,h_table->e2);
+  Kokkos::deep_copy(d_table->f2,h_table->f2);
+  Kokkos::deep_copy(d_table->tabindex,h_table->tabindex);
+
+  d_table_const.nshiftbits = d_table->nshiftbits;
+  d_table_const.nmask = d_table->nmask;
+  d_table_const.innersq = d_table->innersq;
+  d_table_const.invdelta = d_table->invdelta;
+  d_table_const.deltasq6 = d_table->deltasq6;
+  d_table_const.rsq = d_table->rsq;
+  d_table_const.drsq = d_table->drsq;
+  d_table_const.e = d_table->e;
+  d_table_const.de = d_table->de;
+  d_table_const.f = d_table->f;
+  d_table_const.df = d_table->df;
+  d_table_const.e2 = d_table->e2;
+  d_table_const.f2 = d_table->f2;
+
+
+  Kokkos::deep_copy(d_table->cutsq,h_table->cutsq);
+  update_table = 0;
+}
+
+/* ----------------------------------------------------------------------
+   allocate all arrays
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairTableKokkos<DeviceType>::allocate()
+{
+  allocated = 1;
+  const int nt = atom->ntypes + 1;
+
+  memory->create(setflag,nt,nt,"pair:setflag");
+  memory->create_kokkos(d_table->cutsq,h_table->cutsq,cutsq,nt,nt,"pair:cutsq");
+  memory->create_kokkos(d_table->tabindex,h_table->tabindex,tabindex,nt,nt,"pair:tabindex");
+
+  d_table_const.cutsq = d_table->cutsq;
+  d_table_const.tabindex = d_table->tabindex;
+  memset(&setflag[0][0],0,nt*nt*sizeof(int));
+  memset(&cutsq[0][0],0,nt*nt*sizeof(double));
+  memset(&tabindex[0][0],0,nt*nt*sizeof(int));
+}
+
+/* ----------------------------------------------------------------------
+   global settings
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairTableKokkos<DeviceType>::settings(int narg, char **arg)
+{
+  if (narg < 2) error->all(FLERR,"Illegal pair_style command");
+
+  // new settings
+
+  if (strcmp(arg[0],"lookup") == 0) tabstyle = LOOKUP;
+  else if (strcmp(arg[0],"linear") == 0) tabstyle = LINEAR;
+  else if (strcmp(arg[0],"spline") == 0) tabstyle = SPLINE;
+  else if (strcmp(arg[0],"bitmap") == 0) tabstyle = BITMAP;
+  else error->all(FLERR,"Unknown table style in pair_style command");
+
+  tablength = force->inumeric(FLERR,arg[1]);
+  if (tablength < 2) error->all(FLERR,"Illegal number of pair table entries");
+
+  // optional keywords
+  // assert the tabulation is compatible with a specific long-range solver
+
+  int iarg = 2;
+  while (iarg < narg) {
+    if (strcmp(arg[iarg],"ewald") == 0) ewaldflag = 1;
+    else if (strcmp(arg[iarg],"pppm") == 0) pppmflag = 1;
+    else if (strcmp(arg[iarg],"msm") == 0) msmflag = 1;
+    else if (strcmp(arg[iarg],"dispersion") == 0) dispersionflag = 1;
+    else if (strcmp(arg[iarg],"tip4p") == 0) tip4pflag = 1;
+    else error->all(FLERR,"Illegal pair_style command");
+    iarg++;
+  }
+
+  // delete old tables, since cannot just change settings
+
+  for (int m = 0; m < ntables; m++) free_table(&tables[m]);
+  memory->sfree(tables);
+
+  if (allocated) {
+    memory->destroy(setflag);
+    
+    d_table_const.tabindex = d_table->tabindex = typename ArrayTypes<DeviceType>::t_int_2d();
+    h_table->tabindex = typename ArrayTypes<LMPHostType>::t_int_2d();
+
+    d_table_const.cutsq = d_table->cutsq = typename ArrayTypes<DeviceType>::t_ffloat_2d();  
+    h_table->cutsq = typename ArrayTypes<LMPHostType>::t_ffloat_2d();  
+  }
+  allocated = 0;
+
+  ntables = 0;
+  tables = NULL;
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more type pairs
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairTableKokkos<DeviceType>::coeff(int narg, char **arg)
+{
+  if (narg != 4 && narg != 5) error->all(FLERR,"Illegal pair_coeff command");
+  if (!allocated) allocate();
+
+  int ilo,ihi,jlo,jhi;
+  force->bounds(arg[0],atom->ntypes,ilo,ihi);
+  force->bounds(arg[1],atom->ntypes,jlo,jhi);
+
+  int me;
+  MPI_Comm_rank(world,&me);
+  tables = (Table *)
+    memory->srealloc(tables,(ntables+1)*sizeof(Table),"pair:tables");
+  Table *tb = &tables[ntables];
+  null_table(tb);
+  if (me == 0) read_table(tb,arg[2],arg[3]);
+  bcast_table(tb);
+
+  // set table cutoff
+
+  if (narg == 5) tb->cut = force->numeric(FLERR,arg[4]);
+  else if (tb->rflag) tb->cut = tb->rhi;
+  else tb->cut = tb->rfile[tb->ninput-1];
+
+  // error check on table parameters
+  // insure cutoff is within table
+  // for BITMAP tables, file values can be in non-ascending order
+
+  if (tb->ninput <= 1) error->one(FLERR,"Invalid pair table length");
+  double rlo,rhi;
+  if (tb->rflag == 0) {
+    rlo = tb->rfile[0];
+    rhi = tb->rfile[tb->ninput-1];
+  } else {
+    rlo = tb->rlo;
+    rhi = tb->rhi;
+  }
+  if (tb->cut <= rlo || tb->cut > rhi)
+    error->all(FLERR,"Invalid pair table cutoff");
+  if (rlo <= 0.0) error->all(FLERR,"Invalid pair table cutoff");
+
+  // match = 1 if don't need to spline read-in tables
+  // this is only the case if r values needed by final tables
+  //   exactly match r values read from file
+  // for tabstyle SPLINE, always need to build spline tables
+
+  tb->match = 0;
+  if (tabstyle == LINEAR && tb->ninput == tablength &&
+      tb->rflag == RSQ && tb->rhi == tb->cut) tb->match = 1;
+  if (tabstyle == BITMAP && tb->ninput == 1 << tablength &&
+      tb->rflag == BMP && tb->rhi == tb->cut) tb->match = 1;
+  if (tb->rflag == BMP && tb->match == 0)
+    error->all(FLERR,"Bitmapped table in file does not match requested table");
+
+  // spline read-in values and compute r,e,f vectors within table
+
+  if (tb->match == 0) spline_table(tb);
+  compute_table(tb);
+
+  // store ptr to table in tabindex
+
+  int count = 0;
+  for (int i = ilo; i <= ihi; i++) {
+    for (int j = MAX(jlo,i); j <= jhi; j++) {
+      tabindex[i][j] = ntables;
+      setflag[i][j] = 1;
+      count++;
+    }
+  }
+
+  if (count == 0) error->all(FLERR,"Illegal pair_coeff command");
+  ntables++;
+}
+
+/* ----------------------------------------------------------------------
+   init for one type pair i,j and corresponding j,i
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+double PairTableKokkos<DeviceType>::init_one(int i, int j)
+{
+  if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
+
+  tabindex[j][i] = tabindex[i][j];
+
+  if(i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) {
+    m_cutsq[j][i] = m_cutsq[i][j] = tables[tabindex[i][j]].cut*tables[tabindex[i][j]].cut;
+  }
+
+  return tables[tabindex[i][j]].cut;
+}
+
+/* ----------------------------------------------------------------------
+   read a table section from a tabulated potential file
+   only called by proc 0
+   this function sets these values in Table:
+     ninput,rfile,efile,ffile,rflag,rlo,rhi,fpflag,fplo,fphi,ntablebits
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairTableKokkos<DeviceType>::read_table(Table *tb, char *file, char *keyword)
+{
+  char line[MAXLINE];
+
+  // open file
+
+  FILE *fp = force->open_potential(file);
+  if (fp == NULL) {
+    char str[128];
+    sprintf(str,"Cannot open file %s",file);
+    error->one(FLERR,str);
+  }
+
+  // loop until section found with matching keyword
+
+  while (1) {
+    if (fgets(line,MAXLINE,fp) == NULL)
+      error->one(FLERR,"Did not find keyword in table file");
+    if (strspn(line," \t\n\r") == strlen(line)) continue;  // blank line
+    if (line[0] == '#') continue;                          // comment
+    char *word = strtok(line," \t\n\r");
+    if (strcmp(word,keyword) == 0) break;           // matching keyword
+    fgets(line,MAXLINE,fp);                         // no match, skip section
+    param_extract(tb,line);
+    fgets(line,MAXLINE,fp);
+    for (int i = 0; i < tb->ninput; i++) fgets(line,MAXLINE,fp);
+  }
+
+  // read args on 2nd line of section
+  // allocate table arrays for file values
+
+  fgets(line,MAXLINE,fp);
+  param_extract(tb,line);
+  memory->create(tb->rfile,tb->ninput,"pair:rfile");
+  memory->create(tb->efile,tb->ninput,"pair:efile");
+  memory->create(tb->ffile,tb->ninput,"pair:ffile");
+
+  // setup bitmap parameters for table to read in
+
+  tb->ntablebits = 0;
+  int masklo,maskhi,nmask,nshiftbits;
+  if (tb->rflag == BMP) {
+    while (1 << tb->ntablebits < tb->ninput) tb->ntablebits++;
+    if (1 << tb->ntablebits != tb->ninput)
+      error->one(FLERR,"Bitmapped table is incorrect length in table file");
+    init_bitmap(tb->rlo,tb->rhi,tb->ntablebits,masklo,maskhi,nmask,nshiftbits);
+  }
+
+  // read r,e,f table values from file
+  // if rflag set, compute r
+  // if rflag not set, use r from file
+
+  int itmp;
+  double rtmp;
+  union_int_float_t rsq_lookup;
+
+  fgets(line,MAXLINE,fp);
+  for (int i = 0; i < tb->ninput; i++) {
+    fgets(line,MAXLINE,fp);
+    sscanf(line,"%d %lg %lg %lg",&itmp,&rtmp,&tb->efile[i],&tb->ffile[i]);
+
+    if (tb->rflag == RLINEAR)
+      rtmp = tb->rlo + (tb->rhi - tb->rlo)*i/(tb->ninput-1);
+    else if (tb->rflag == RSQ) {
+      rtmp = tb->rlo*tb->rlo +
+        (tb->rhi*tb->rhi - tb->rlo*tb->rlo)*i/(tb->ninput-1);
+      rtmp = sqrt(rtmp);
+    } else if (tb->rflag == BMP) {
+      rsq_lookup.i = i << nshiftbits;
+      rsq_lookup.i |= masklo;
+      if (rsq_lookup.f < tb->rlo*tb->rlo) {
+        rsq_lookup.i = i << nshiftbits;
+        rsq_lookup.i |= maskhi;
+      }
+      rtmp = sqrtf(rsq_lookup.f);
+    }
+
+    tb->rfile[i] = rtmp;
+  }
+
+  // close file
+
+  fclose(fp);
+}
+
+/* ----------------------------------------------------------------------
+   broadcast read-in table info from proc 0 to other procs
+   this function communicates these values in Table:
+     ninput,rfile,efile,ffile,rflag,rlo,rhi,fpflag,fplo,fphi
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairTableKokkos<DeviceType>::bcast_table(Table *tb)
+{
+  MPI_Bcast(&tb->ninput,1,MPI_INT,0,world);
+
+  int me;
+  MPI_Comm_rank(world,&me);
+  if (me > 0) {
+    memory->create(tb->rfile,tb->ninput,"pair:rfile");
+    memory->create(tb->efile,tb->ninput,"pair:efile");
+    memory->create(tb->ffile,tb->ninput,"pair:ffile");
+  }
+
+  MPI_Bcast(tb->rfile,tb->ninput,MPI_DOUBLE,0,world);
+  MPI_Bcast(tb->efile,tb->ninput,MPI_DOUBLE,0,world);
+  MPI_Bcast(tb->ffile,tb->ninput,MPI_DOUBLE,0,world);
+
+  MPI_Bcast(&tb->rflag,1,MPI_INT,0,world);
+  if (tb->rflag) {
+    MPI_Bcast(&tb->rlo,1,MPI_DOUBLE,0,world);
+    MPI_Bcast(&tb->rhi,1,MPI_DOUBLE,0,world);
+  }
+  MPI_Bcast(&tb->fpflag,1,MPI_INT,0,world);
+  if (tb->fpflag) {
+    MPI_Bcast(&tb->fplo,1,MPI_DOUBLE,0,world);
+    MPI_Bcast(&tb->fphi,1,MPI_DOUBLE,0,world);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   build spline representation of e,f over entire range of read-in table
+   this function sets these values in Table: e2file,f2file
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairTableKokkos<DeviceType>::spline_table(Table *tb)
+{
+  memory->create(tb->e2file,tb->ninput,"pair:e2file");
+  memory->create(tb->f2file,tb->ninput,"pair:f2file");
+
+  double ep0 = - tb->ffile[0];
+  double epn = - tb->ffile[tb->ninput-1];
+  spline(tb->rfile,tb->efile,tb->ninput,ep0,epn,tb->e2file);
+
+  if (tb->fpflag == 0) {
+    tb->fplo = (tb->ffile[1] - tb->ffile[0]) / (tb->rfile[1] - tb->rfile[0]);
+    tb->fphi = (tb->ffile[tb->ninput-1] - tb->ffile[tb->ninput-2]) /
+      (tb->rfile[tb->ninput-1] - tb->rfile[tb->ninput-2]);
+  }
+
+  double fp0 = tb->fplo;
+  double fpn = tb->fphi;
+  spline(tb->rfile,tb->ffile,tb->ninput,fp0,fpn,tb->f2file);
+}
+
+/* ----------------------------------------------------------------------
+   extract attributes from parameter line in table section
+   format of line: N value R/RSQ/BITMAP lo hi FP fplo fphi
+   N is required, other params are optional
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairTableKokkos<DeviceType>::param_extract(Table *tb, char *line)
+{
+  tb->ninput = 0;
+  tb->rflag = NONE;
+  tb->fpflag = 0;
+
+  char *word = strtok(line," \t\n\r\f");
+  while (word) {
+    if (strcmp(word,"N") == 0) {
+      word = strtok(NULL," \t\n\r\f");
+      tb->ninput = atoi(word);
+    } else if (strcmp(word,"R") == 0 || strcmp(word,"RSQ") == 0 ||
+               strcmp(word,"BITMAP") == 0) {
+      if (strcmp(word,"R") == 0) tb->rflag = RLINEAR;
+      else if (strcmp(word,"RSQ") == 0) tb->rflag = RSQ;
+      else if (strcmp(word,"BITMAP") == 0) tb->rflag = BMP;
+      word = strtok(NULL," \t\n\r\f");
+      tb->rlo = atof(word);
+      word = strtok(NULL," \t\n\r\f");
+      tb->rhi = atof(word);
+    } else if (strcmp(word,"FP") == 0) {
+      tb->fpflag = 1;
+      word = strtok(NULL," \t\n\r\f");
+      tb->fplo = atof(word);
+      word = strtok(NULL," \t\n\r\f");
+      tb->fphi = atof(word);
+    } else {
+      printf("WORD: %s\n",word);
+      error->one(FLERR,"Invalid keyword in pair table parameters");
+    }
+    word = strtok(NULL," \t\n\r\f");
+  }
+
+  if (tb->ninput == 0) error->one(FLERR,"Pair table parameters did not set N");
+}
+
+/* ----------------------------------------------------------------------
+   compute r,e,f vectors from splined values
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairTableKokkos<DeviceType>::compute_table(Table *tb)
+{
+  update_table = 1;
+  int tlm1 = tablength-1;
+
+  // inner = inner table bound
+  // cut = outer table bound
+  // delta = table spacing in rsq for N-1 bins
+
+  double inner;
+  if (tb->rflag) inner = tb->rlo;
+  else inner = tb->rfile[0];
+  tb->innersq = inner*inner;
+  tb->delta = (tb->cut*tb->cut - tb->innersq) / tlm1;
+  tb->invdelta = 1.0/tb->delta;
+
+  // direct lookup tables
+  // N-1 evenly spaced bins in rsq from inner to cut
+  // e,f = value at midpt of bin
+  // e,f are N-1 in length since store 1 value at bin midpt
+  // f is converted to f/r when stored in f[i]
+  // e,f are never a match to read-in values, always computed via spline interp
+
+  if (tabstyle == LOOKUP) {
+    memory->create(tb->e,tlm1,"pair:e");
+    memory->create(tb->f,tlm1,"pair:f");
+
+    double r,rsq;
+    for (int i = 0; i < tlm1; i++) {
+      rsq = tb->innersq + (i+0.5)*tb->delta;
+      r = sqrt(rsq);
+      tb->e[i] = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r);
+      tb->f[i] = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r)/r;
+    }
+  }
+
+  // linear tables
+  // N-1 evenly spaced bins in rsq from inner to cut
+  // rsq,e,f = value at lower edge of bin
+  // de,df values = delta from lower edge to upper edge of bin
+  // rsq,e,f are N in length so de,df arrays can compute difference
+  // f is converted to f/r when stored in f[i]
+  // e,f can match read-in values, else compute via spline interp
+
+  if (tabstyle == LINEAR) {
+    memory->create(tb->rsq,tablength,"pair:rsq");
+    memory->create(tb->e,tablength,"pair:e");
+    memory->create(tb->f,tablength,"pair:f");
+    memory->create(tb->de,tlm1,"pair:de");
+    memory->create(tb->df,tlm1,"pair:df");
+
+    double r,rsq;
+    for (int i = 0; i < tablength; i++) {
+      rsq = tb->innersq + i*tb->delta;
+      r = sqrt(rsq);
+      tb->rsq[i] = rsq;
+      if (tb->match) {
+        tb->e[i] = tb->efile[i];
+        tb->f[i] = tb->ffile[i]/r;
+      } else {
+        tb->e[i] = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r);
+        tb->f[i] = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r)/r;
+      }
+    }
+
+    for (int i = 0; i < tlm1; i++) {
+      tb->de[i] = tb->e[i+1] - tb->e[i];
+      tb->df[i] = tb->f[i+1] - tb->f[i];
+    }
+  }
+
+  // cubic spline tables
+  // N-1 evenly spaced bins in rsq from inner to cut
+  // rsq,e,f = value at lower edge of bin
+  // e2,f2 = spline coefficient for each bin
+  // rsq,e,f,e2,f2 are N in length so have N-1 spline bins
+  // f is converted to f/r after e is splined
+  // e,f can match read-in values, else compute via spline interp
+
+  if (tabstyle == SPLINE) {
+    memory->create(tb->rsq,tablength,"pair:rsq");
+    memory->create(tb->e,tablength,"pair:e");
+    memory->create(tb->f,tablength,"pair:f");
+    memory->create(tb->e2,tablength,"pair:e2");
+    memory->create(tb->f2,tablength,"pair:f2");
+
+    tb->deltasq6 = tb->delta*tb->delta / 6.0;
+
+    double r,rsq;
+    for (int i = 0; i < tablength; i++) {
+      rsq = tb->innersq + i*tb->delta;
+      r = sqrt(rsq);
+      tb->rsq[i] = rsq;
+      if (tb->match) {
+        tb->e[i] = tb->efile[i];
+        tb->f[i] = tb->ffile[i]/r;
+      } else {
+        tb->e[i] = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r);
+        tb->f[i] = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r);
+      }
+    }
+
+    // ep0,epn = dh/dg at inner and at cut
+    // h(r) = e(r) and g(r) = r^2
+    // dh/dg = (de/dr) / 2r = -f/2r
+
+    double ep0 = - tb->f[0] / (2.0 * sqrt(tb->innersq));
+    double epn = - tb->f[tlm1] / (2.0 * tb->cut);
+    spline(tb->rsq,tb->e,tablength,ep0,epn,tb->e2);
+
+    // fp0,fpn = dh/dg at inner and at cut
+    // h(r) = f(r)/r and g(r) = r^2
+    // dh/dg = (1/r df/dr - f/r^2) / 2r
+    // dh/dg in secant approx = (f(r2)/r2 - f(r1)/r1) / (g(r2) - g(r1))
+
+    double fp0,fpn;
+    double secant_factor = 0.1;
+    if (tb->fpflag) fp0 = (tb->fplo/sqrt(tb->innersq) - tb->f[0]/tb->innersq) /
+      (2.0 * sqrt(tb->innersq));
+    else {
+      double rsq1 = tb->innersq;
+      double rsq2 = rsq1 + secant_factor*tb->delta;
+      fp0 = (splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,sqrt(rsq2)) /
+             sqrt(rsq2) - tb->f[0] / sqrt(rsq1)) / (secant_factor*tb->delta);
+    }
+
+    if (tb->fpflag && tb->cut == tb->rfile[tb->ninput-1]) fpn =
+      (tb->fphi/tb->cut - tb->f[tlm1]/(tb->cut*tb->cut)) / (2.0 * tb->cut);
+    else {
+      double rsq2 = tb->cut * tb->cut;
+      double rsq1 = rsq2 - secant_factor*tb->delta;
+      fpn = (tb->f[tlm1] / sqrt(rsq2) -
+             splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,sqrt(rsq1)) /
+             sqrt(rsq1)) / (secant_factor*tb->delta);
+    }
+
+    for (int i = 0; i < tablength; i++) tb->f[i] /= sqrt(tb->rsq[i]);
+    spline(tb->rsq,tb->f,tablength,fp0,fpn,tb->f2);
+  }
+
+  // bitmapped linear tables
+  // 2^N bins from inner to cut, spaced in bitmapped manner
+  // f is converted to f/r when stored in f[i]
+  // e,f can match read-in values, else compute via spline interp
+
+  if (tabstyle == BITMAP) {
+    double r;
+    union_int_float_t rsq_lookup;
+    int masklo,maskhi;
+
+    // linear lookup tables of length ntable = 2^n
+    // stored value = value at lower edge of bin
+
+    init_bitmap(inner,tb->cut,tablength,masklo,maskhi,tb->nmask,tb->nshiftbits);
+    int ntable = 1 << tablength;
+    int ntablem1 = ntable - 1;
+
+    memory->create(tb->rsq,ntable,"pair:rsq");
+    memory->create(tb->e,ntable,"pair:e");
+    memory->create(tb->f,ntable,"pair:f");
+    memory->create(tb->de,ntable,"pair:de");
+    memory->create(tb->df,ntable,"pair:df");
+    memory->create(tb->drsq,ntable,"pair:drsq");
+
+    union_int_float_t minrsq_lookup;
+    minrsq_lookup.i = 0 << tb->nshiftbits;
+    minrsq_lookup.i |= maskhi;
+
+    for (int i = 0; i < ntable; i++) {
+      rsq_lookup.i = i << tb->nshiftbits;
+      rsq_lookup.i |= masklo;
+      if (rsq_lookup.f < tb->innersq) {
+        rsq_lookup.i = i << tb->nshiftbits;
+        rsq_lookup.i |= maskhi;
+      }
+      r = sqrtf(rsq_lookup.f);
+      tb->rsq[i] = rsq_lookup.f;
+      if (tb->match) {
+        tb->e[i] = tb->efile[i];
+        tb->f[i] = tb->ffile[i]/r;
+      } else {
+        tb->e[i] = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r);
+        tb->f[i] = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r)/r;
+      }
+      minrsq_lookup.f = MIN(minrsq_lookup.f,rsq_lookup.f);
+    }
+
+    tb->innersq = minrsq_lookup.f;
+
+    for (int i = 0; i < ntablem1; i++) {
+      tb->de[i] = tb->e[i+1] - tb->e[i];
+      tb->df[i] = tb->f[i+1] - tb->f[i];
+      tb->drsq[i] = 1.0/(tb->rsq[i+1] - tb->rsq[i]);
+    }
+
+    // get the delta values for the last table entries
+    // tables are connected periodically between 0 and ntablem1
+
+    tb->de[ntablem1] = tb->e[0] - tb->e[ntablem1];
+    tb->df[ntablem1] = tb->f[0] - tb->f[ntablem1];
+    tb->drsq[ntablem1] = 1.0/(tb->rsq[0] - tb->rsq[ntablem1]);
+
+    // get the correct delta values at itablemax
+    // smallest r is in bin itablemin
+    // largest r is in bin itablemax, which is itablemin-1,
+    //   or ntablem1 if itablemin=0
+
+    // deltas at itablemax only needed if corresponding rsq < cut*cut
+    // if so, compute deltas between rsq and cut*cut
+    //   if tb->match, data at cut*cut is unavailable, so we'll take
+    //   deltas at itablemax-1 as a good approximation
+
+    double e_tmp,f_tmp;
+    int itablemin = minrsq_lookup.i & tb->nmask;
+    itablemin >>= tb->nshiftbits;
+    int itablemax = itablemin - 1;
+    if (itablemin == 0) itablemax = ntablem1;
+    int itablemaxm1 = itablemax - 1;
+    if (itablemax == 0) itablemaxm1 = ntablem1;
+    rsq_lookup.i = itablemax << tb->nshiftbits;
+    rsq_lookup.i |= maskhi;
+    if (rsq_lookup.f < tb->cut*tb->cut) {
+      if (tb->match) {
+        tb->de[itablemax] = tb->de[itablemaxm1];
+        tb->df[itablemax] = tb->df[itablemaxm1];
+        tb->drsq[itablemax] = tb->drsq[itablemaxm1];
+      } else {
+            rsq_lookup.f = tb->cut*tb->cut;
+        r = sqrtf(rsq_lookup.f);
+        e_tmp = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r);
+        f_tmp = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r)/r;
+        tb->de[itablemax] = e_tmp - tb->e[itablemax];
+        tb->df[itablemax] = f_tmp - tb->f[itablemax];
+        tb->drsq[itablemax] = 1.0/(rsq_lookup.f - tb->rsq[itablemax]);
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   set all ptrs in a table to NULL, so can be freed safely
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairTableKokkos<DeviceType>::null_table(Table *tb)
+{
+  tb->rfile = tb->efile = tb->ffile = NULL;
+  tb->e2file = tb->f2file = NULL;
+  tb->rsq = tb->drsq = tb->e = tb->de = NULL;
+  tb->f = tb->df = tb->e2 = tb->f2 = NULL;
+}
+
+/* ----------------------------------------------------------------------
+   free all arrays in a table
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairTableKokkos<DeviceType>::free_table(Table *tb)
+{
+  memory->destroy(tb->rfile);
+  memory->destroy(tb->efile);
+  memory->destroy(tb->ffile);
+  memory->destroy(tb->e2file);
+  memory->destroy(tb->f2file);
+
+  memory->destroy(tb->rsq);
+  memory->destroy(tb->drsq);
+  memory->destroy(tb->e);
+  memory->destroy(tb->de);
+  memory->destroy(tb->f);
+  memory->destroy(tb->df);
+  memory->destroy(tb->e2);
+  memory->destroy(tb->f2);
+}
+
+/* ----------------------------------------------------------------------
+   spline and splint routines modified from Numerical Recipes
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairTableKokkos<DeviceType>::spline(double *x, double *y, int n,
+                       double yp1, double ypn, double *y2)
+{
+  int i,k;
+  double p,qn,sig,un;
+  double *u = new double[n];
+
+  if (yp1 > 0.99e30) y2[0] = u[0] = 0.0;
+  else {
+    y2[0] = -0.5;
+    u[0] = (3.0/(x[1]-x[0])) * ((y[1]-y[0]) / (x[1]-x[0]) - yp1);
+  }
+  for (i = 1; i < n-1; i++) {
+    sig = (x[i]-x[i-1]) / (x[i+1]-x[i-1]);
+    p = sig*y2[i-1] + 2.0;
+    y2[i] = (sig-1.0) / p;
+    u[i] = (y[i+1]-y[i]) / (x[i+1]-x[i]) - (y[i]-y[i-1]) / (x[i]-x[i-1]);
+    u[i] = (6.0*u[i] / (x[i+1]-x[i-1]) - sig*u[i-1]) / p;
+  }
+  if (ypn > 0.99e30) qn = un = 0.0;
+  else {
+    qn = 0.5;
+    un = (3.0/(x[n-1]-x[n-2])) * (ypn - (y[n-1]-y[n-2]) / (x[n-1]-x[n-2]));
+  }
+  y2[n-1] = (un-qn*u[n-2]) / (qn*y2[n-2] + 1.0);
+  for (k = n-2; k >= 0; k--) y2[k] = y2[k]*y2[k+1] + u[k];
+
+  delete [] u;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+double PairTableKokkos<DeviceType>::splint(double *xa, double *ya, double *y2a, int n, double x)
+{
+  int klo,khi,k;
+  double h,b,a,y;
+
+  klo = 0;
+  khi = n-1;
+  while (khi-klo > 1) {
+    k = (khi+klo) >> 1;
+    if (xa[k] > x) khi = k;
+    else klo = k;
+  }
+  h = xa[khi]-xa[klo];
+  a = (xa[khi]-x) / h;
+  b = (x-xa[klo]) / h;
+  y = a*ya[klo] + b*ya[khi] +
+    ((a*a*a-a)*y2a[klo] + (b*b*b-b)*y2a[khi]) * (h*h)/6.0;
+  return y;
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to restart file
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairTableKokkos<DeviceType>::write_restart(FILE *fp)
+{
+  write_restart_settings(fp);
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairTableKokkos<DeviceType>::read_restart(FILE *fp)
+{
+  read_restart_settings(fp);
+  allocate();
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 writes to restart file
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairTableKokkos<DeviceType>::write_restart_settings(FILE *fp)
+{
+  fwrite(&tabstyle,sizeof(int),1,fp);
+  fwrite(&tablength,sizeof(int),1,fp);
+  fwrite(&ewaldflag,sizeof(int),1,fp);
+  fwrite(&pppmflag,sizeof(int),1,fp);
+  fwrite(&msmflag,sizeof(int),1,fp);
+  fwrite(&dispersionflag,sizeof(int),1,fp);
+  fwrite(&tip4pflag,sizeof(int),1,fp);
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads from restart file, bcasts
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairTableKokkos<DeviceType>::read_restart_settings(FILE *fp)
+{
+  if (comm->me == 0) {
+    fread(&tabstyle,sizeof(int),1,fp);
+    fread(&tablength,sizeof(int),1,fp);
+    fread(&ewaldflag,sizeof(int),1,fp);
+    fread(&pppmflag,sizeof(int),1,fp);
+    fread(&msmflag,sizeof(int),1,fp);
+    fread(&dispersionflag,sizeof(int),1,fp);
+    fread(&tip4pflag,sizeof(int),1,fp);
+  }
+  MPI_Bcast(&tabstyle,1,MPI_INT,0,world);
+  MPI_Bcast(&tablength,1,MPI_INT,0,world);
+  MPI_Bcast(&ewaldflag,1,MPI_INT,0,world);
+  MPI_Bcast(&pppmflag,1,MPI_INT,0,world);
+  MPI_Bcast(&msmflag,1,MPI_INT,0,world);
+  MPI_Bcast(&dispersionflag,1,MPI_INT,0,world);
+  MPI_Bcast(&tip4pflag,1,MPI_INT,0,world);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+double PairTableKokkos<DeviceType>::single(int i, int j, int itype, int jtype, double rsq,
+                         double factor_coul, double factor_lj,
+                         double &fforce)
+{
+  int itable;
+  double fraction,value,a,b,phi;
+  int tlm1 = tablength - 1;
+
+  Table *tb = &tables[tabindex[itype][jtype]];
+  if (rsq < tb->innersq) error->one(FLERR,"Pair distance < table inner cutoff");
+
+  if (tabstyle == LOOKUP) {
+    itable = static_cast<int> ((rsq-tb->innersq) * tb->invdelta);
+    if (itable >= tlm1) error->one(FLERR,"Pair distance > table outer cutoff");
+    fforce = factor_lj * tb->f[itable];
+  } else if (tabstyle == LINEAR) {
+    itable = static_cast<int> ((rsq-tb->innersq) * tb->invdelta);
+    if (itable >= tlm1) error->one(FLERR,"Pair distance > table outer cutoff");
+    fraction = (rsq - tb->rsq[itable]) * tb->invdelta;
+    value = tb->f[itable] + fraction*tb->df[itable];
+    fforce = factor_lj * value;
+  } else if (tabstyle == SPLINE) {
+    itable = static_cast<int> ((rsq-tb->innersq) * tb->invdelta);
+    if (itable >= tlm1) error->one(FLERR,"Pair distance > table outer cutoff");
+    b = (rsq - tb->rsq[itable]) * tb->invdelta;
+    a = 1.0 - b;
+    value = a * tb->f[itable] + b * tb->f[itable+1] +
+      ((a*a*a-a)*tb->f2[itable] + (b*b*b-b)*tb->f2[itable+1]) *
+      tb->deltasq6;
+    fforce = factor_lj * value;
+  } else {
+    union_int_float_t rsq_lookup;
+    rsq_lookup.f = rsq;
+    itable = rsq_lookup.i & tb->nmask;
+    itable >>= tb->nshiftbits;
+    fraction = (rsq_lookup.f - tb->rsq[itable]) * tb->drsq[itable];
+    value = tb->f[itable] + fraction*tb->df[itable];
+    fforce = factor_lj * value;
+  }
+
+  if (tabstyle == LOOKUP)
+    phi = tb->e[itable];
+  else if (tabstyle == LINEAR || tabstyle == BITMAP)
+    phi = tb->e[itable] + fraction*tb->de[itable];
+  else
+    phi = a * tb->e[itable] + b * tb->e[itable+1] +
+      ((a*a*a-a)*tb->e2[itable] + (b*b*b-b)*tb->e2[itable+1]) * tb->deltasq6;
+  return factor_lj*phi;
+}
+
+/* ----------------------------------------------------------------------
+   return the Coulomb cutoff for tabled potentials
+   called by KSpace solvers which require that all pairwise cutoffs be the same
+   loop over all tables not just those indexed by tabindex[i][j] since
+     no way to know which tables are active since pair::init() not yet called
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void *PairTableKokkos<DeviceType>::extract(const char *str, int &dim)
+{
+  if (strcmp(str,"cut_coul") != 0) return NULL;
+  if (ntables == 0) error->all(FLERR,"All pair coeffs are not set");
+
+  double cut_coul = tables[0].cut;
+  for (int m = 1; m < ntables; m++)
+    if (tables[m].cut != cut_coul)
+      error->all(FLERR,
+                 "Pair table cutoffs must all be equal to use with KSpace");
+  dim = 0;
+  return &tables[0].cut;
+}
+
+template<class DeviceType>
+void PairTableKokkos<DeviceType>::init_style()
+{
+  neighbor->request(this);
+  neighflag = lmp->kokkos->neighflag;
+  int irequest = neighbor->nrequest - 1;
+
+  neighbor->requests[irequest]->
+    kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value &&
+    !Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
+  neighbor->requests[irequest]->
+    kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
+
+  if (neighflag == FULL) {
+    neighbor->requests[irequest]->full = 1;
+    neighbor->requests[irequest]->half = 0;
+    neighbor->requests[irequest]->full_cluster = 0;
+  } else if (neighflag == HALF || neighflag == HALFTHREAD) {
+    neighbor->requests[irequest]->full = 0;
+    neighbor->requests[irequest]->half = 1;
+    neighbor->requests[irequest]->full_cluster = 0;
+  } else if (neighflag == N2) {
+    neighbor->requests[irequest]->full = 0;
+    neighbor->requests[irequest]->half = 0;
+    neighbor->requests[irequest]->full_cluster = 0;
+  } else if (neighflag == FULLCLUSTER) {
+    neighbor->requests[irequest]->full_cluster = 1;
+    neighbor->requests[irequest]->full = 1;
+    neighbor->requests[irequest]->half = 0;
+  } else {
+    error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/kk");
+  }
+}
+
+/*
+template <class DeviceType> template<int NEIGHFLAG>
+KOKKOS_INLINE_FUNCTION
+void PairTableKokkos<DeviceType>::
+ev_tally(EV_FLOAT &ev, const int &i, const int &j, const F_FLOAT &fpair,
+         const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const
+{
+  const int EFLAG = eflag;
+  const int NEWTON_PAIR = newton_pair;
+  const int VFLAG = vflag_either;
+
+  if (EFLAG) {
+    if (eflag_atom) {
+      E_FLOAT epairhalf = 0.5 * (ev.evdwl + ev.ecoul);
+      if (NEWTON_PAIR || i < nlocal) eatom[i] += epairhalf;
+      if (NEWTON_PAIR || j < nlocal) eatom[j] += epairhalf;
+    }
+  }
+
+  if (VFLAG) {
+    const E_FLOAT v0 = delx*delx*fpair;
+    const E_FLOAT v1 = dely*dely*fpair;
+    const E_FLOAT v2 = delz*delz*fpair;
+    const E_FLOAT v3 = delx*dely*fpair;
+    const E_FLOAT v4 = delx*delz*fpair;
+    const E_FLOAT v5 = dely*delz*fpair;
+
+    if (vflag_global) {
+      if (NEIGHFLAG) {
+        if (NEWTON_PAIR) {
+          ev.v[0] += v0;
+          ev.v[1] += v1;
+          ev.v[2] += v2;
+          ev.v[3] += v3;
+          ev.v[4] += v4;
+          ev.v[5] += v5;
+        } else {
+          if (i < nlocal) {
+            ev.v[0] += 0.5*v0;
+            ev.v[1] += 0.5*v1;
+            ev.v[2] += 0.5*v2;
+            ev.v[3] += 0.5*v3;
+            ev.v[4] += 0.5*v4;
+            ev.v[5] += 0.5*v5;
+          }
+          if (j < nlocal) {
+            ev.v[0] += 0.5*v0;
+            ev.v[1] += 0.5*v1;
+            ev.v[2] += 0.5*v2;
+            ev.v[3] += 0.5*v3;
+            ev.v[4] += 0.5*v4;
+            ev.v[5] += 0.5*v5;
+          }
+        }
+      } else {
+        ev.v[0] += 0.5*v0;
+        ev.v[1] += 0.5*v1;
+        ev.v[2] += 0.5*v2;
+        ev.v[3] += 0.5*v3;
+        ev.v[4] += 0.5*v4;
+        ev.v[5] += 0.5*v5;
+      }
+    }
+
+    if (vflag_atom) {
+      if (NEWTON_PAIR || i < nlocal) {
+        d_vatom(i,0) += 0.5*v0;
+        d_vatom(i,1) += 0.5*v1;
+        d_vatom(i,2) += 0.5*v2;
+        d_vatom(i,3) += 0.5*v3;
+        d_vatom(i,4) += 0.5*v4;
+        d_vatom(i,5) += 0.5*v5;
+      }
+      if (NEWTON_PAIR || (NEIGHFLAG && j < nlocal)) {
+        d_vatom(j,0) += 0.5*v0;
+        d_vatom(j,1) += 0.5*v1;
+        d_vatom(j,2) += 0.5*v2;
+        d_vatom(j,3) += 0.5*v3;
+        d_vatom(j,4) += 0.5*v4;
+        d_vatom(j,5) += 0.5*v5;
+      }
+    }
+  }
+}
+*/
+template<class DeviceType>
+void PairTableKokkos<DeviceType>::cleanup_copy() {
+  // WHY needed: this prevents parent copy from deallocating any arrays
+  allocated = 0;
+  cutsq = NULL;
+  eatom = NULL;
+  vatom = NULL;
+  h_table=NULL; d_table=NULL;
+}
+
+template class PairTableKokkos<LMPDeviceType>;
+#if DEVICE==2
+template class PairTableKokkos<LMPHostType>;
+#endif
+
diff --git a/src/KOKKOS/pair_table_kokkos.h b/src/KOKKOS/pair_table_kokkos.h
new file mode 100644
index 0000000000..317703c895
--- /dev/null
+++ b/src/KOKKOS/pair_table_kokkos.h
@@ -0,0 +1,352 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(table/kk,PairTableKokkos<LMPDeviceType>)
+PairStyle(table/kk/device,PairTableKokkos<LMPDeviceType>)
+PairStyle(table/kk/host,PairTableKokkos<LMPHostType>)
+
+#else
+
+#ifndef LMP_PAIR_TABLE_KOKKOS_H
+#define LMP_PAIR_TABLE_KOKKOS_H
+
+#include "pair.h"
+#include "pair_kokkos.h"
+#include "neigh_list_kokkos.h"
+#include "atom_kokkos.h"
+
+namespace LAMMPS_NS {
+
+template<class Device,int TABSTYLE>
+struct S_TableCompute {
+  enum {TabStyle = TABSTYLE};
+};
+
+template <class DeviceType, int NEIGHFLAG, int TABSTYLE>
+class PairTableComputeFunctor;
+
+template<class DeviceType>
+class PairTableKokkos : public Pair {
+ public:
+
+  enum {COUL_FLAG=0};
+  typedef DeviceType device_type;
+
+  PairTableKokkos(class LAMMPS *);
+  virtual ~PairTableKokkos();
+
+  virtual void compute(int, int);
+  
+  template<int TABSTYLE> 
+  void compute_style(int, int);
+
+  /*template<int EVFLAG, int NEIGHFLAG, int NEWTON_PAIR, int TABSTYLE>
+  KOKKOS_FUNCTION
+  EV_FLOAT compute_item(const int& i,
+                        const NeighListKokkos<DeviceType> &list) const;
+*/
+  void settings(int, char **);
+  void coeff(int, char **);
+  double init_one(int, int);
+  void write_restart(FILE *);
+  void read_restart(FILE *);
+  void write_restart_settings(FILE *);
+  void read_restart_settings(FILE *);
+  double single(int, int, int, int, double, double, double, double &);
+  void *extract(const char *, int &);
+
+  void init_style();
+
+
+ protected:
+  enum{LOOKUP,LINEAR,SPLINE,BITMAP};
+
+  int tabstyle,tablength;
+  /*struct TableDeviceConst {
+    typename ArrayTypes<DeviceType>::t_ffloat_2d_randomread cutsq;
+    typename ArrayTypes<DeviceType>::t_int_2d_randomread tabindex;
+    typename ArrayTypes<DeviceType>::t_int_1d_randomread nshiftbits,nmask;
+    typename ArrayTypes<DeviceType>::t_ffloat_1d_randomread innersq,invdelta,deltasq6;
+    typename ArrayTypes<DeviceType>::t_ffloat_2d_randomread rsq,drsq,e,de,f,df,e2,f2;
+  };*/
+ //Its faster not to use texture fetch if the number of tables is less than 32! 
+  struct TableDeviceConst {
+    typename ArrayTypes<DeviceType>::t_ffloat_2d cutsq;
+    typename ArrayTypes<DeviceType>::t_int_2d tabindex;
+    typename ArrayTypes<DeviceType>::t_int_1d nshiftbits,nmask;
+    typename ArrayTypes<DeviceType>::t_ffloat_1d innersq,invdelta,deltasq6;
+    typename ArrayTypes<DeviceType>::t_ffloat_2d_randomread rsq,drsq,e,de,f,df,e2,f2;
+  };
+
+  struct TableDevice {
+    typename ArrayTypes<DeviceType>::t_ffloat_2d cutsq;
+    typename ArrayTypes<DeviceType>::t_int_2d tabindex;
+    typename ArrayTypes<DeviceType>::t_int_1d nshiftbits,nmask;
+    typename ArrayTypes<DeviceType>::t_ffloat_1d innersq,invdelta,deltasq6;
+    typename ArrayTypes<DeviceType>::t_ffloat_2d rsq,drsq,e,de,f,df,e2,f2;
+  };
+
+  struct TableHost {
+    typename ArrayTypes<LMPHostType>::t_ffloat_2d cutsq;
+    typename ArrayTypes<LMPHostType>::t_int_2d tabindex;
+    typename ArrayTypes<LMPHostType>::t_int_1d nshiftbits,nmask;
+    typename ArrayTypes<LMPHostType>::t_ffloat_1d innersq,invdelta,deltasq6;
+    typename ArrayTypes<LMPHostType>::t_ffloat_2d rsq,drsq,e,de,f,df,e2,f2;
+  };
+
+  struct Table {
+    int ninput,rflag,fpflag,match,ntablebits;
+    int nshiftbits,nmask;
+    double rlo,rhi,fplo,fphi,cut;
+    double *rfile,*efile,*ffile;
+    double *e2file,*f2file;
+    double innersq,delta,invdelta,deltasq6;
+    double *rsq,*drsq,*e,*de,*f,*df,*e2,*f2;
+  };
+  int ntables;
+  Table *tables;
+  TableDeviceConst d_table_const;
+  TableDevice* d_table;
+  TableHost* h_table;
+
+  int **tabindex;
+  F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
+
+  typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq;
+
+  void allocate();
+  void read_table(Table *, char *, char *);
+  void param_extract(Table *, char *);
+  void bcast_table(Table *);
+  void spline_table(Table *);
+  void compute_table(Table *);
+  void null_table(Table *);
+  void free_table(Table *);
+  void spline(double *, double *, int, double, double, double *);
+  double splint(double *, double *, double *, int, double);
+
+  typename ArrayTypes<DeviceType>::t_x_array_randomread x;
+  typename ArrayTypes<DeviceType>::t_x_array_const c_x;
+  typename ArrayTypes<DeviceType>::t_f_array f;
+  typename ArrayTypes<DeviceType>::t_int_1d_randomread type;
+  typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
+  typename ArrayTypes<DeviceType>::t_virial_array d_vatom;
+
+ protected:
+  int nlocal,nall,eflag,vflag,neighflag,newton_pair;
+  class AtomKokkos *atomKK;
+  int update_table;
+  void create_kokkos_tables();
+  void cleanup_copy();
+
+  template<bool STACKPARAMS, class Specialisation>
+  KOKKOS_INLINE_FUNCTION
+  F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const;
+
+  template<bool STACKPARAMS, class Specialisation>
+  KOKKOS_INLINE_FUNCTION
+  F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const;
+
+  template<bool STACKPARAMS, class Specialisation>
+  KOKKOS_INLINE_FUNCTION
+  F_FLOAT compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const {
+    return 0;
+  }
+
+  friend class PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,LOOKUP> >;
+  friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,LOOKUP> >;
+  friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,LOOKUP> >;
+  friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,LOOKUP> >;
+  friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,LOOKUP> >;
+  friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,LOOKUP> >;
+  friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,LOOKUP> >;
+  friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,LOOKUP> >;
+  friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,LOOKUP> >;
+  friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,LOOKUP> >;
+
+  friend class PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,LINEAR> >;
+  friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,LINEAR> >;
+  friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,LINEAR> >;
+  friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,LINEAR> >;
+  friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,LINEAR> >;
+  friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,LINEAR> >;
+  friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,LINEAR> >;
+  friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,LINEAR> >;
+  friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,LINEAR> >;
+  friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,LINEAR> >;
+
+  friend class PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,SPLINE> >;
+  friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,SPLINE> >;
+  friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,SPLINE> >;
+  friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,SPLINE> >;
+  friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,SPLINE> >;
+  friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,SPLINE> >;
+  friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,SPLINE> >;
+  friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,SPLINE> >;
+  friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,SPLINE> >;
+  friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,SPLINE> >;
+
+  friend class PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,BITMAP> >;
+  friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,BITMAP> >;
+  friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,BITMAP> >;
+  friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,BITMAP> >;
+  friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,BITMAP> >;
+  friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,BITMAP> >;
+  friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,BITMAP> >;
+  friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,BITMAP> >;
+  friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,BITMAP> >;
+  friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,BITMAP> >;
+/*template<int FULL_NEIGH>
+  KOKKOS_INLINE_FUNCTION
+    void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
+                  const F_FLOAT &fpair, const F_FLOAT &delx,
+                  const F_FLOAT &dely, const F_FLOAT &delz) const;
+*/
+};
+/*
+template <class DeviceType, int NEIGHFLAG, int TABSTYLE>
+struct PairTableComputeFunctor  {
+  typedef DeviceType device_type ;
+  typedef EV_FLOAT value_type;
+
+  PairTableKokkos<DeviceType> c;
+  NeighListKokkos<DeviceType> list;
+
+  PairTableComputeFunctor(PairTableKokkos<DeviceType>* c_ptr,
+                          NeighListKokkos<DeviceType>* list_ptr):
+  c(*c_ptr),list(*list_ptr) {};
+  ~PairTableComputeFunctor() {c.cleanup_copy();list.clean_copy();};
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(const int i) const {
+    if (c.newton_pair) c.template compute_item<0,NEIGHFLAG,1,TABSTYLE>(i,list);
+    else c.template compute_item<0,NEIGHFLAG,0,TABSTYLE>(i,list);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(const int i, value_type &energy_virial) const {
+    if (c.newton_pair)
+      energy_virial += c.template compute_item<1,NEIGHFLAG,1,TABSTYLE>(i,list);
+    else
+      energy_virial += c.template compute_item<1,NEIGHFLAG,0,TABSTYLE>(i,list);
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  static void init(volatile value_type &update) {
+    update.evdwl = 0;
+    update.ecoul = 0;
+    update.v[0] = 0;
+    update.v[1] = 0;
+    update.v[2] = 0;
+    update.v[3] = 0;
+    update.v[4] = 0;
+    update.v[5] = 0;
+  }
+  KOKKOS_INLINE_FUNCTION
+  static void join(volatile value_type &update,
+                   const volatile value_type &source) {
+    update.evdwl += source.evdwl;
+    update.ecoul += source.ecoul;
+    update.v[0] += source.v[0];
+    update.v[1] += source.v[1];
+    update.v[2] += source.v[2];
+    update.v[3] += source.v[3];
+    update.v[4] += source.v[4];
+    update.v[5] += source.v[5];
+  }
+};
+
+*/
+
+
+
+
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+E: Pair distance < table inner cutoff
+
+Two atoms are closer together than the pairwise table allows.
+
+E: Pair distance > table outer cutoff
+
+Two atoms are further apart than the pairwise table allows.
+
+E: Illegal ... command
+
+Self-explanatory.  Check the input script syntax and compare to the
+documentation for the command.  You can use -echo screen as a
+command-line option when running LAMMPS to see the offending line.
+
+E: Unknown table style in pair_style command
+
+Style of table is invalid for use with pair_style table command.
+
+E: Illegal number of pair table entries
+
+There must be at least 2 table entries.
+
+E: Invalid pair table length
+
+Length of read-in pair table is invalid
+
+E: Invalid pair table cutoff
+
+Cutoffs in pair_coeff command are not valid with read-in pair table.
+
+E: Bitmapped table in file does not match requested table
+
+Setting for bitmapped table in pair_coeff command must match table
+in file exactly.
+
+E: All pair coeffs are not set
+
+All pair coefficients must be set in the data file or by the
+pair_coeff command before running a simulation.
+
+E: Cannot open file %s
+
+The specified file cannot be opened.  Check that the path and name are
+correct. If the file is a compressed file, also check that the gzip
+executable can be found and run.
+
+E: Did not find keyword in table file
+
+Keyword used in pair_coeff command was not found in table file.
+
+E: Bitmapped table is incorrect length in table file
+
+Number of table entries is not a correct power of 2.
+
+E: Invalid keyword in pair table parameters
+
+Keyword used in list of table parameters is not recognized.
+
+E: Pair table parameters did not set N
+
+List of pair table parameters must include N setting.
+
+E: Pair table cutoffs must all be equal to use with KSpace
+
+When using pair style table with a long-range KSpace solver, the
+cutoffs for all atom type pairs must all be the same, since the
+long-range solver starts at that cutoff.
+
+*/
diff --git a/src/KOKKOS/verlet_kokkos.cpp b/src/KOKKOS/verlet_kokkos.cpp
new file mode 100644
index 0000000000..2883cb06e3
--- /dev/null
+++ b/src/KOKKOS/verlet_kokkos.cpp
@@ -0,0 +1,443 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "string.h"
+#include "verlet_kokkos.h"
+#include "neighbor.h"
+#include "domain.h"
+#include "comm.h"
+#include "atom.h"
+#include "atom_kokkos.h"
+#include "atom_masks.h"
+#include "force.h"
+#include "pair.h"
+#include "bond.h"
+#include "angle.h"
+#include "dihedral.h"
+#include "improper.h"
+#include "kspace.h"
+#include "output.h"
+#include "update.h"
+#include "modify.h"
+#include "compute.h"
+#include "fix.h"
+#include "timer.h"
+#include "memory.h"
+#include "error.h"
+
+#include <ctime>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+VerletKokkos::VerletKokkos(LAMMPS *lmp, int narg, char **arg) :
+  Verlet(lmp, narg, arg) 
+{
+  atomKK = (AtomKokkos *) atom;
+}
+
+/* ----------------------------------------------------------------------
+   setup before run
+------------------------------------------------------------------------- */
+
+void VerletKokkos::setup()
+{
+  if (comm->me == 0 && screen) fprintf(screen,"Setting up run ...\n");
+  update->setupflag = 1;
+
+  // setup domain, communication and neighboring
+  // acquire ghosts
+  // build neighbor lists
+
+  atomKK->modified(Host,ALL_MASK);
+
+  atomKK->setup();
+  modify->setup_pre_exchange();
+      // debug
+      atomKK->sync(Host,ALL_MASK);
+      atomKK->modified(Host,ALL_MASK);
+  if (triclinic) domain->x2lamda(atomKK->nlocal);
+  domain->pbc();
+
+  atomKK->sync(Host,ALL_MASK);
+
+  domain->reset_box();
+  comm->setup();
+  if (neighbor->style) neighbor->setup_bins();
+  comm->exchange();
+  if (atomKK->sortfreq > 0) atomKK->sort();
+  comm->borders();
+  if (triclinic) domain->lamda2x(atomKK->nlocal+atomKK->nghost);
+
+  atomKK->sync(Host,ALL_MASK);
+
+  domain->image_check();
+  domain->box_too_small_check();
+  modify->setup_pre_neighbor();
+
+  atomKK->modified(Host,ALL_MASK);
+
+  neighbor->build();
+  neighbor->ncalls = 0;
+
+  // compute all forces
+
+  ev_set(update->ntimestep);
+  force_clear();
+  modify->setup_pre_force(vflag);
+
+  if (pair_compute_flag) force->pair->compute(eflag,vflag);
+  else if (force->pair) force->pair->compute_dummy(eflag,vflag);
+
+  if (atomKK->molecular) {
+    if (force->bond) force->bond->compute(eflag,vflag);
+    if (force->angle) force->angle->compute(eflag,vflag);
+    if (force->dihedral) force->dihedral->compute(eflag,vflag);
+    if (force->improper) force->improper->compute(eflag,vflag);
+  }
+
+  if (force->kspace) {
+    force->kspace->setup();
+    if (kspace_compute_flag) force->kspace->compute(eflag,vflag);
+    else force->kspace->compute_dummy(eflag,vflag);
+  }
+
+  if (force->newton) comm->reverse_comm();
+
+  modify->setup(vflag);
+  output->setup();
+  update->setupflag = 0;
+}
+
+/* ----------------------------------------------------------------------
+   setup without output
+   flag = 0 = just force calculation
+   flag = 1 = reneighbor and force calculation
+------------------------------------------------------------------------- */
+
+void VerletKokkos::setup_minimal(int flag)
+{
+  update->setupflag = 1;
+
+  // setup domain, communication and neighboring
+  // acquire ghosts
+  // build neighbor lists
+
+  if (flag) {
+    atomKK->modified(Host,ALL_MASK);
+
+    modify->setup_pre_exchange();
+      // debug
+      atomKK->sync(Host,ALL_MASK);
+      atomKK->modified(Host,ALL_MASK);
+
+    if (triclinic) domain->x2lamda(atomKK->nlocal);
+    domain->pbc();
+
+    atomKK->sync(Host,ALL_MASK);
+
+    domain->reset_box();
+    comm->setup();
+    if (neighbor->style) neighbor->setup_bins();
+    comm->exchange();
+    comm->borders();
+    if (triclinic) domain->lamda2x(atomKK->nlocal+atomKK->nghost);
+
+    atomKK->sync(Host,ALL_MASK);
+
+    domain->image_check();
+    domain->box_too_small_check();
+    modify->setup_pre_neighbor();
+
+    atomKK->modified(Host,ALL_MASK);
+
+    neighbor->build();
+    neighbor->ncalls = 0;
+  }
+
+  // compute all forces
+
+  ev_set(update->ntimestep);
+  force_clear();
+  modify->setup_pre_force(vflag);
+
+  if (pair_compute_flag) force->pair->compute(eflag,vflag);
+  else if (force->pair) force->pair->compute_dummy(eflag,vflag);
+
+  if (atomKK->molecular) {
+    if (force->bond) force->bond->compute(eflag,vflag);
+    if (force->angle) force->angle->compute(eflag,vflag);
+    if (force->dihedral) force->dihedral->compute(eflag,vflag);
+    if (force->improper) force->improper->compute(eflag,vflag);
+  }
+
+  if (force->kspace) {
+    force->kspace->setup();
+    if (kspace_compute_flag) force->kspace->compute(eflag,vflag);
+    else force->kspace->compute_dummy(eflag,vflag);
+  }
+
+  if (force->newton) comm->reverse_comm();
+
+  modify->setup(vflag);
+  update->setupflag = 0;
+}
+
+/* ----------------------------------------------------------------------
+   run for N steps
+------------------------------------------------------------------------- */
+
+void VerletKokkos::run(int n)
+{
+  bigint ntimestep;
+  int nflag,sortflag;
+
+  int n_post_integrate = modify->n_post_integrate;
+  int n_pre_exchange = modify->n_pre_exchange;
+  int n_pre_neighbor = modify->n_pre_neighbor;
+  int n_pre_force = modify->n_pre_force;
+  int n_post_force = modify->n_post_force;
+  int n_end_of_step = modify->n_end_of_step;
+
+  if (atomKK->sortfreq > 0) sortflag = 1;
+  else sortflag = 0;
+
+  static double time = 0.0;
+  static int count = 0;
+  atomKK->sync(Device,ALL_MASK);
+  Kokkos::Impl::Timer ktimer;
+
+  for (int i = 0; i < n; i++) {
+
+    ntimestep = ++update->ntimestep;
+    ev_set(ntimestep);
+
+    // initial time integration
+
+    ktimer.reset();
+    modify->initial_integrate(vflag);
+    time += ktimer.seconds();
+    if (n_post_integrate) modify->post_integrate();
+
+    // regular communication vs neighbor list rebuild
+
+    nflag = neighbor->decide();
+
+    if (nflag == 0) {
+      timer->stamp();
+      comm->forward_comm();
+      timer->stamp(TIME_COMM);
+    } else {
+      // added debug
+      //atomKK->sync(Host,ALL_MASK);
+      //atomKK->modified(Host,ALL_MASK);
+
+      if (n_pre_exchange) modify->pre_exchange();
+      // debug
+      //atomKK->sync(Host,ALL_MASK);
+      //atomKK->modified(Host,ALL_MASK);
+      if (triclinic) domain->x2lamda(atomKK->nlocal);
+      domain->pbc();
+      if (domain->box_change) {
+        domain->reset_box();
+        comm->setup();
+        if (neighbor->style) neighbor->setup_bins();
+      }
+      timer->stamp();
+
+      // added debug
+      //atomKK->sync(Device,ALL_MASK);
+      //atomKK->modified(Device,ALL_MASK);
+
+      comm->exchange();
+      if (sortflag && ntimestep >= atomKK->nextsort) atomKK->sort();
+      comm->borders();
+
+      // added debug
+      //atomKK->sync(Host,ALL_MASK);
+      //atomKK->modified(Host,ALL_MASK);
+
+      if (triclinic) domain->lamda2x(atomKK->nlocal+atomKK->nghost);
+
+      timer->stamp(TIME_COMM);
+      if (n_pre_neighbor) modify->pre_neighbor();
+      neighbor->build();
+      timer->stamp(TIME_NEIGHBOR);
+    }
+
+    // force computations
+    // important for pair to come before bonded contributions
+    // since some bonded potentials tally pairwise energy/virial
+    // and Pair:ev_tally() needs to be called before any tallying
+
+    force_clear();
+    // added for debug
+    //atomKK->k_x.sync<LMPHostType>();
+    //atomKK->k_f.sync<LMPHostType>();
+    //atomKK->k_f.modify<LMPHostType>();
+    if (n_pre_force) modify->pre_force(vflag);
+
+    timer->stamp();
+
+    if (pair_compute_flag) {
+      force->pair->compute(eflag,vflag);
+      timer->stamp(TIME_PAIR);
+    }
+
+    if (atomKK->molecular) {
+      if (force->bond) force->bond->compute(eflag,vflag);
+      if (force->angle) force->angle->compute(eflag,vflag);
+      if (force->dihedral) force->dihedral->compute(eflag,vflag);
+      if (force->improper) force->improper->compute(eflag,vflag);
+      timer->stamp(TIME_BOND);
+    }
+
+    if (kspace_compute_flag) {
+      force->kspace->compute(eflag,vflag);
+      timer->stamp(TIME_KSPACE);
+    }
+
+    // reverse communication of forces
+
+    if (force->newton) {
+      atomKK->sync(Host,F_MASK);
+      comm->reverse_comm();
+      atomKK->modified(Host,F_MASK);
+      timer->stamp(TIME_COMM);
+    }
+
+    // force modifications, final time integration, diagnostics
+
+    ktimer.reset();
+
+    if (n_post_force) modify->post_force(vflag);
+    modify->final_integrate();
+    if (n_end_of_step) modify->end_of_step();
+
+    time += ktimer.seconds();
+
+    // all output
+
+    if (ntimestep == output->next) {
+       atomKK->sync(Host,ALL_MASK);
+
+      timer->stamp();
+      output->write(ntimestep);
+      timer->stamp(TIME_OUTPUT);
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   clear force on own & ghost atoms
+   clear other arrays as needed
+------------------------------------------------------------------------- */
+
+void VerletKokkos::force_clear()
+{
+  int i;
+
+  if (external_force_clear) return;
+
+  // clear force on all particles
+  // if either newton flag is set, also include ghosts
+  // when using threads always clear all forces.
+
+  if (neighbor->includegroup == 0) {
+    int nall;
+    if (force->newton) nall = atomKK->nlocal + atomKK->nghost;
+    else nall = atomKK->nlocal;
+
+    size_t nbytes = sizeof(double) * nall;
+
+    if (nbytes) {
+      if (atomKK->k_f.modified_host > atomKK->k_f.modified_device) {
+    	memset_kokkos(atomKK->k_f.view<LMPHostType>());
+    	atomKK->modified(Host,F_MASK);
+      } else {
+        memset_kokkos(atomKK->k_f.view<LMPDeviceType>());
+        atomKK->modified(Device,F_MASK);
+      }
+      if (torqueflag)  memset(&(atomKK->torque[0][0]),0,3*nbytes);
+      if (erforceflag) memset(&(atomKK->erforce[0]),  0,  nbytes);
+      if (e_flag)      memset(&(atomKK->de[0]),       0,  nbytes);
+      if (rho_flag)    memset(&(atomKK->drho[0]),     0,  nbytes);
+    }
+
+  // neighbor includegroup flag is set
+  // clear force only on initial nfirst particles
+  // if either newton flag is set, also include ghosts
+
+  } else {
+    int nall = atomKK->nfirst;
+    if (atomKK->k_f.modified_host > atomKK->k_f.modified_device) {
+      memset_kokkos(atomKK->k_f.view<LMPHostType>());
+      atomKK->modified(Host,F_MASK);
+    } else {
+      memset_kokkos(atomKK->k_f.view<LMPDeviceType>());
+      atomKK->modified(Device,F_MASK);
+    }
+    if (torqueflag) {
+      double **torque = atomKK->torque;
+      for (i = 0; i < nall; i++) {
+        torque[i][0] = 0.0;
+        torque[i][1] = 0.0;
+        torque[i][2] = 0.0;
+      }
+    }
+
+    if (erforceflag) {
+      double *erforce = atomKK->erforce;
+      for (i = 0; i < nall; i++) erforce[i] = 0.0;
+    }
+
+    if (e_flag) {
+      double *de = atomKK->de;
+      for (i = 0; i < nall; i++) de[i] = 0.0;
+    }
+
+    if (rho_flag) {
+      double *drho = atomKK->drho;
+      for (i = 0; i < nall; i++) drho[i] = 0.0;
+    }
+
+    if (force->newton) {
+      nall = atomKK->nlocal + atomKK->nghost;
+
+      if (torqueflag) {
+        double **torque = atomKK->torque;
+        for (i = atomKK->nlocal; i < nall; i++) {
+          torque[i][0] = 0.0;
+          torque[i][1] = 0.0;
+          torque[i][2] = 0.0;
+        }
+      }
+
+      if (erforceflag) {
+        double *erforce = atomKK->erforce;
+        for (i = atomKK->nlocal; i < nall; i++) erforce[i] = 0.0;
+      }
+
+      if (e_flag) {
+        double *de = atomKK->de;
+        for (i = 0; i < nall; i++) de[i] = 0.0;
+      }
+
+      if (rho_flag) {
+        double *drho = atomKK->drho;
+        for (i = 0; i < nall; i++) drho[i] = 0.0;
+      }
+    }
+  }
+}
diff --git a/src/KOKKOS/verlet_kokkos.h b/src/KOKKOS/verlet_kokkos.h
new file mode 100644
index 0000000000..63531bda2d
--- /dev/null
+++ b/src/KOKKOS/verlet_kokkos.h
@@ -0,0 +1,48 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef INTEGRATE_CLASS
+
+IntegrateStyle(verlet/kk,VerletKokkos)
+
+#else
+
+#ifndef LMP_VERLET_KOKKOS_H
+#define LMP_VERLET_KOKKOS_H
+
+#include "verlet.h"
+
+namespace LAMMPS_NS {
+
+class VerletKokkos : public Verlet {
+ public:
+  VerletKokkos(class LAMMPS *, int, char **);
+  ~VerletKokkos() {}
+  void setup();
+  void setup_minimal(int);
+  void run(int);
+
+ protected:
+  class AtomKokkos *atomKK;
+
+  void force_clear();
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+*/
diff --git a/src/MAKE/Makefile.cuda b/src/MAKE/Makefile.cuda
new file mode 100755
index 0000000000..61b1738ba8
--- /dev/null
+++ b/src/MAKE/Makefile.cuda
@@ -0,0 +1,111 @@
+# cuda = RedHat Linux box, nvcc for Kokkos, MPICH2, FFTW
+
+SHELL = /bin/sh
+
+# ---------------------------------------------------------------------
+# compiler/linker settings
+# specify flags and libraries needed for your compiler
+
+CC =		nvcc
+CCFLAGS =	-g -O3 -arch=sm_20
+SHFLAGS =	-fPIC
+DEPFLAGS =	-M
+
+LINK =		g++
+LINKFLAGS =	-g -O
+LIB = 
+SIZE =		size
+
+ARCHIVE =	ar
+ARFLAGS =	-rc
+SHLIBFLAGS =	-shared
+
+# ---------------------------------------------------------------------
+# LAMMPS-specific settings
+# specify settings for LAMMPS features you will use
+# if you change any -D setting, do full re-compile after "make clean"
+
+# LAMMPS ifdef settings, OPTIONAL
+# see possible settings in doc/Section_start.html#2_2 (step 4)
+
+LMP_INC =	-DLAMMPS_GZIP -DLAMMPS_JPEG
+
+# MPI library, REQUIRED
+# see discussion in doc/Section_start.html#2_2 (step 5)
+# can point to dummy MPI library in src/STUBS as in Makefile.serial
+# INC = path for mpi.h, MPI compiler settings
+# PATH = path for MPI library
+# LIB = name of MPI library
+
+MPI_INC =       -DMPICH_SKIP_MPICXX
+MPI_PATH = 
+MPI_LIB =	-lmpich -lmpl -lpthread
+
+# FFT library, OPTIONAL
+# see discussion in doc/Section_start.html#2_2 (step 6)
+# can be left blank to use provided KISS FFT library
+# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
+# PATH = path for FFT library
+# LIB = name of FFT library
+
+FFT_INC =    	-DFFT_FFTW
+FFT_PATH = 
+FFT_LIB =	-lfftw
+
+# JPEG and/or PNG library, OPTIONAL
+# see discussion in doc/Section_start.html#2_2 (step 7)
+# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
+# INC = path(s) for jpeglib.h and/or png.h
+# PATH = path(s) for JPEG library and/or PNG library
+# LIB = name(s) of JPEG library and/or PNG library
+
+JPG_INC =       
+JPG_PATH = 	
+JPG_LIB =	-ljpeg
+
+# ---------------------------------------------------------------------
+# build rules and dependencies
+# no need to edit this section
+
+include	Makefile.package.settings
+include	Makefile.package
+
+EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
+EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
+EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
+
+# Path to src files
+
+vpath %.cpp ..
+vpath %.h ..
+
+# Link target
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Library targets
+
+lib:	$(OBJ)
+	$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
+
+shlib:	$(OBJ)
+	$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
+        $(OBJ) $(EXTRA_LIB) $(LIB)
+
+# Compilation rules
+
+%.o:%.cu
+	$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
+
+%.o:%.cpp
+	$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
+
+%.d:%.cpp
+	$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
+
+# Individual dependencies
+
+DEPENDS = $(OBJ:.o=.d)
+sinclude $(DEPENDS)
diff --git a/src/Makefile b/src/Makefile
index 8241135cc2..f8e70a94dc 100755
--- a/src/Makefile
+++ b/src/Makefile
@@ -14,7 +14,7 @@ OBJ = 	$(SRC:.cpp=.o)
 # Package variables
 
 PACKAGE = asphere body class2 colloid dipole fld gpu granular kim \
-	  kspace manybody mc meam misc molecule mpiio opt peri poems \
+	  kokkos kspace manybody mc meam misc molecule mpiio opt peri poems \
 	  reax replica rigid shock srd voronoi xtc
 
 PACKUSER = user-atc user-awpmd user-cg-cmm user-colvars \
diff --git a/src/atom_vec.cpp b/src/atom_vec.cpp
index 0fd8043d85..06c61caa2f 100644
--- a/src/atom_vec.cpp
+++ b/src/atom_vec.cpp
@@ -80,8 +80,10 @@ void AtomVec::init()
   deform_groupbit = domain->deform_groupbit;
   h_rate = domain->h_rate;
 
-  if (lmp->cuda != NULL && cudable == false)
+  if (lmp->cuda != NULL && !cudable)
     error->all(FLERR,"USER-CUDA package requires a cuda enabled atom_style");
+  if (lmp->kokkos != NULL && !kokkosable)
+    error->all(FLERR,"KOKKOS package requires a kokkos enabled atom_style");
 }
 
 /* ----------------------------------------------------------------------
diff --git a/src/compute_property_local.cpp b/src/compute_property_local.cpp
index da34de08ae..82d85d4bec 100644
--- a/src/compute_property_local.cpp
+++ b/src/compute_property_local.cpp
@@ -334,7 +334,7 @@ void ComputePropertyLocal::compute_local()
 
 int ComputePropertyLocal::count_pairs(int allflag, int forceflag)
 {
-  int i,j,m,n,ii,jj,inum,jnum,itype,jtype;
+  int i,j,m,ii,jj,inum,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
@@ -358,7 +358,7 @@ int ComputePropertyLocal::count_pairs(int allflag, int forceflag)
 
   double **cutsq = force->pair->cutsq;
 
-  m = n = 0;
+  m = 0;
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     if (!(mask[i] & groupbit)) continue;
-- 
GitLab