diff --git a/src/GPU/fix_gpu.cpp b/src/GPU/fix_gpu.cpp
index ddd82b30b59cde846aac81ad5a39f92e7eca54eb..22ec8dde3bf08b4850e53dcd5607de01595a8329 100644
--- a/src/GPU/fix_gpu.cpp
+++ b/src/GPU/fix_gpu.cpp
@@ -80,9 +80,6 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) :
 {
   if (lmp->citeme) lmp->citeme->add(cite_gpu_package);
 
-  if (lmp->cuda)
-    error->all(FLERR,"Cannot use GPU package with USER-CUDA package enabled");
-
   if (narg < 4) error->all(FLERR,"Illegal package gpu command");
 
   int ngpu = atoi(arg[3]);
diff --git a/src/USER-OMP/fix_omp.cpp b/src/USER-OMP/fix_omp.cpp
index cd90ef6e155992a45c971d8705c087b86386298e..e3adb6c268607399590c8bdca87686427ae152bb 100644
--- a/src/USER-OMP/fix_omp.cpp
+++ b/src/USER-OMP/fix_omp.cpp
@@ -45,15 +45,8 @@
 
 #include "suffix.h"
 
-#if defined(LMP_USER_CUDA)
-#include "cuda_modify_flags.h"
-#endif
-
 using namespace LAMMPS_NS;
 using namespace FixConst;
-#if defined(LMP_USER_CUDA)
-using namespace FixConstCuda;
-#endif
 
 static int get_tid()
 {
@@ -157,18 +150,6 @@ FixOMP::~FixOMP()
 
 int FixOMP::setmask()
 {
-  // compatibility with USER-CUDA
-  // our fix doesn't need any data transfer.
-#if defined(LMP_USER_CUDA)
-  if (lmp->cuda) {
-    int mask = 0;
-    mask |= PRE_FORCE_CUDA;
-    mask |= PRE_FORCE_RESPA;
-    mask |= MIN_PRE_FORCE;
-    return mask;
-  }
-#endif
-
   int mask = 0;
   mask |= PRE_FORCE;
   mask |= PRE_FORCE_RESPA;
diff --git a/src/atom.cpp b/src/atom.cpp
index e96e5e20506141cebf407f201c27ca914dd8bb96..0793caad2d33074d33f45e0eb8a31583ced7ca72 100644
--- a/src/atom.cpp
+++ b/src/atom.cpp
@@ -32,7 +32,6 @@
 #include "domain.h"
 #include "group.h"
 #include "molecule.h"
-#include "accelerator_cuda.h"
 #include "atom_masks.h"
 #include "math_const.h"
 #include "memory.h"
@@ -44,7 +43,6 @@ using namespace MathConst;
 #define DELTA 1
 #define DELTA_MEMSTR 1024
 #define EPSILON 1.0e-6
-#define CUDA_CHUNK 3000
 
 enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED};    // several files
 
@@ -1716,10 +1714,6 @@ void Atom::sort()
 
   nextsort = (update->ntimestep/sortfreq)*sortfreq + sortfreq;
 
-  // download data from GPU if necessary
-
-  if (lmp->cuda && !lmp->cuda->oncpu) lmp->cuda->downloadAll();
-
   // re-setup sort bins if needed
 
   if (domain->box_change) setup_sort_bins();
@@ -1795,10 +1789,6 @@ void Atom::sort()
     current[empty] = permute[empty];
   }
 
-  // upload data back to GPU if necessary
-
-  if (lmp->cuda && !lmp->cuda->oncpu) lmp->cuda->uploadAll();
-
   // sanity check that current = permute
 
   //int flag = 0;
@@ -1817,25 +1807,12 @@ void Atom::setup_sort_bins()
 {
   // binsize:
   // user setting if explicitly set
-  // 1/2 of neighbor cutoff for non-CUDA
-  // CUDA_CHUNK atoms/proc for CUDA
+  // default = 1/2 of neighbor cutoff
   // check if neighbor cutoff = 0.0
 
   double binsize;
   if (userbinsize > 0.0) binsize = userbinsize;
-  else if (!lmp->cuda) binsize = 0.5 * neighbor->cutneighmax;
-  else {
-    if (domain->dimension == 3) {
-      double vol = (domain->boxhi[0]-domain->boxlo[0]) *
-        (domain->boxhi[1]-domain->boxlo[1]) *
-        (domain->boxhi[2]-domain->boxlo[2]);
-      binsize = pow(1.0*CUDA_CHUNK/natoms*vol,1.0/3.0);
-    } else {
-      double area = (domain->boxhi[0]-domain->boxlo[0]) *
-        (domain->boxhi[1]-domain->boxlo[1]);
-      binsize = pow(1.0*CUDA_CHUNK/natoms*area,1.0/2.0);
-    }
-  }
+  else binsize = 0.5 * neighbor->cutneighmax;
   if (binsize == 0.0) error->all(FLERR,"Atom sorting has bin size = 0.0");
 
   double bininv = 1.0/binsize;
diff --git a/src/atom_vec.cpp b/src/atom_vec.cpp
index c6f54ec19e9931b00d35161eedcdcf5cfe76be1b..a8fa982c002e7f399381c720d4d7c1332d58c66d 100644
--- a/src/atom_vec.cpp
+++ b/src/atom_vec.cpp
@@ -33,7 +33,7 @@ AtomVec::AtomVec(LAMMPS *lmp) : Pointers(lmp)
   mass_type = dipole_type = 0;
   forceclearflag = 0;
   size_data_bonus = 0;
-  cudable = kokkosable = 0;
+  kokkosable = 0;
 
   nargcopy = 0;
   argcopy = NULL;
@@ -81,8 +81,6 @@ void AtomVec::init()
   deform_groupbit = domain->deform_groupbit;
   h_rate = domain->h_rate;
 
-  if (lmp->cuda != NULL && !cudable)
-    error->all(FLERR,"USER-CUDA package requires a cuda enabled atom_style");
   if (lmp->kokkos != NULL && !kokkosable)
     error->all(FLERR,"KOKKOS package requires a kokkos enabled atom_style");
 }
diff --git a/src/atom_vec.h b/src/atom_vec.h
index 66bfd14608a69a9de86bad743ea3000978041a3e..267cc599d10d01de844c6327b6bea222dcb6f59b 100644
--- a/src/atom_vec.h
+++ b/src/atom_vec.h
@@ -43,9 +43,7 @@ class AtomVec : protected Pointers {
   class Molecule **onemols;            // list of molecules for style template
   int nset;                            // # of molecules in list
 
-  int cudable;                         // 1 if atom style is CUDA-enabled
   int kokkosable;                      // 1 if atom style is KOKKOS-enabled
-  int *maxsend;                        // CUDA-specific variable
 
   int nargcopy;          // copy of command-line args for atom_style command
   char **argcopy;        // used when AtomVec is realloced (restart,replicate)
diff --git a/src/comm_tiled.cpp b/src/comm_tiled.cpp
index 98f406b2938baaf227e5f019d90a804ea9941150..058de915ba1c70e2f991e657e79e1244e2512841 100644
--- a/src/comm_tiled.cpp
+++ b/src/comm_tiled.cpp
@@ -45,8 +45,6 @@ enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED};    // several files
 
 CommTiled::CommTiled(LAMMPS *lmp) : Comm(lmp)
 {
-  if (lmp->cuda)
-    error->all(FLERR,"USER-CUDA package does not yet support comm_style tiled");
   if (lmp->kokkos)
     error->all(FLERR,"KOKKOS package does not yet support comm_style tiled");
 
@@ -65,8 +63,6 @@ CommTiled::CommTiled(LAMMPS *lmp) : Comm(lmp)
 //
 CommTiled::CommTiled(LAMMPS *lmp, Comm *oldcomm) : Comm(*oldcomm)
 {
-  if (lmp->cuda)
-    error->all(FLERR,"USER-CUDA package does not yet support comm_style tiled");
   if (lmp->kokkos)
     error->all(FLERR,"KOKKOS package does not yet support comm_style tiled");
 
diff --git a/src/info.cpp b/src/info.cpp
index 9db3dd01de7534b0e22fee75ef932027e37d3656..016a4c65261e1ed1819d470434d3906dd28e5881 100644
--- a/src/info.cpp
+++ b/src/info.cpp
@@ -17,7 +17,6 @@
 
 #include <string.h>
 #include "info.h"
-#include "accelerator_cuda.h"
 #include "accelerator_kokkos.h"
 #include "atom.h"
 #include "comm.h"
@@ -476,9 +475,7 @@ bool Info::is_active(const char *category, const char *name)
   const int len = strlen(name);
 
   if (strcmp(category,"package") == 0) {
-    if (strcmp(name,"cuda") == 0) {
-      return (lmp->cuda && lmp->cuda->cuda_exists) ? true : false;
-    } else if (strcmp(name,"gpu") == 0) {
+    if (strcmp(name,"gpu") == 0) {
       return (modify->find_fix("package_gpu") >= 0) ? true : false;
     } else if (strcmp(name,"intel") == 0) {
       return (modify->find_fix("package_intel") >= 0) ? true : false;
diff --git a/src/input.cpp b/src/input.cpp
index bcb64effe98ac3b2985ad16b13c9c48fc7fbed29..7983f81532caa13f89bddf30bcd4a5379e31d553 100644
--- a/src/input.cpp
+++ b/src/input.cpp
@@ -46,7 +46,6 @@
 #include "special.h"
 #include "timer.h"
 #include "variable.h"
-#include "accelerator_cuda.h"
 #include "accelerator_kokkos.h"
 #include "error.h"
 #include "memory.h"
@@ -1567,13 +1566,7 @@ void Input::package()
   // same checks for packages existing as in LAMMPS::post_create()
   // since can be invoked here by package command in input script
 
-  if (strcmp(arg[0],"cuda") == 0) {
-    if (lmp->cuda == NULL || lmp->cuda->cuda_exists == 0)
-      error->all(FLERR,
-                 "Package cuda command without USER-CUDA package enabled");
-    lmp->cuda->accelerator(narg-1,&arg[1]);
-
-  } else if (strcmp(arg[0],"gpu") == 0) {
+  if (strcmp(arg[0],"gpu") == 0) {
     if (!modify->check_package("GPU"))
       error->all(FLERR,"Package gpu command without GPU package installed");
 
diff --git a/src/lammps.cpp b/src/lammps.cpp
index af9950073f3e8a0af4b67dfe6cc3387fd2631f22..326a3229cd853d3edfa839cb387a427a7f712a1e 100644
--- a/src/lammps.cpp
+++ b/src/lammps.cpp
@@ -581,7 +581,6 @@ LAMMPS::~LAMMPS()
 
   if (world != universe->uworld) MPI_Comm_free(&world);
 
-  delete cuda;
   delete kokkos;
   delete [] suffix;
   delete [] suffix2;
@@ -603,16 +602,13 @@ void LAMMPS::create()
   // Comm class must be created before Atom class
   // so that nthreads is defined when create_avec invokes grow()
 
-  if (cuda) comm = new CommCuda(this);
-  else if (kokkos) comm = new CommKokkos(this);
+  if (kokkos) comm = new CommKokkos(this);
   else comm = new CommBrick(this);
 
-  if (cuda) neighbor = new NeighborCuda(this);
-  else if (kokkos) neighbor = new NeighborKokkos(this);
+  if (kokkos) neighbor = new NeighborKokkos(this);
   else neighbor = new Neighbor(this);
 
-  if (cuda) domain = new DomainCuda(this);
-  else if (kokkos) domain = new DomainKokkos(this);
+  if (kokkos) domain = new DomainKokkos(this);
 #ifdef LMP_USER_OMP
   else domain = new DomainOMP(this);
 #else
@@ -630,8 +626,7 @@ void LAMMPS::create()
   group = new Group(this);
   force = new Force(this);    // must be after group, to create temperature
 
-  if (cuda) modify = new ModifyCuda(this);
-  else if (kokkos) modify = new ModifyKokkos(this);
+  if (kokkos) modify = new ModifyKokkos(this);
   else modify = new Modify(this);
 
   output = new Output(this);  // must be after group, so "all" exists
@@ -651,19 +646,16 @@ void LAMMPS::create()
 
 void LAMMPS::post_create()
 {
-  // default package commands triggered by "-c on" and "-k on"
+  // default package command triggered by "-k on"
 
-  if (cuda && cuda->cuda_exists) input->one("package cuda 1");
   if (kokkos && kokkos->kokkos_exists) input->one("package kokkos");
 
   // suffix will always be set if suffix_enable = 1
-  // check that USER-CUDA and KOKKOS package classes were instantiated
+  // check that KOKKOS package classes were instantiated
   // check that GPU, INTEL, USER-OMP fixes were compiled with LAMMPS
 
   if (!suffix_enable) return;
 
-  if (strcmp(suffix,"cuda") == 0 && (cuda == NULL || cuda->cuda_exists == 0))
-    error->all(FLERR,"Using suffix cuda without USER-CUDA package enabled");
   if (strcmp(suffix,"gpu") == 0 && !modify->check_package("GPU"))
     error->all(FLERR,"Using suffix gpu without GPU package installed");
   if (strcmp(suffix,"intel") == 0 && !modify->check_package("INTEL"))
@@ -771,7 +763,6 @@ void LAMMPS::help()
 {
   fprintf(screen,
           "\nCommand line options:\n\n"
-          "-cuda on/off                : turn CUDA mode on or off (-c)\n"
           "-echo none/screen/log/both  : echoing of input script (-e)\n"
           "-help                       : print this help message (-h)\n"
           "-in filename                : read input from file, not stdin (-i)\n"
@@ -785,7 +776,7 @@ void LAMMPS::help()
           "-restart rfile dfile ...    : convert restart to data file (-r)\n"
           "-reorder topology-specs     : processor reordering (-r)\n"
           "-screen none/filename       : where to send screen output (-sc)\n"
-          "-suffix cuda/gpu/opt/omp    : style suffix to apply (-sf)\n"
+          "-suffix gpu/intel/opt/omp   : style suffix to apply (-sf)\n"
           "-var varname value          : set index style variable (-v)\n\n");
 
   fprintf(screen,"Style options compiled with this executable\n\n");
diff --git a/src/lammps.h b/src/lammps.h
index 88449a4d88c49c325a905e0f3300d05e71411ab3..3978f99111b6ace3d46cc38575b1c75eda932571 100644
--- a/src/lammps.h
+++ b/src/lammps.h
@@ -50,7 +50,6 @@ class LAMMPS {
   int num_package;               // number of cmdline package commands
   int cite_enable;               // 1 if generating log.cite, 0 if disabled
 
-  class Cuda *cuda;              // CUDA accelerator class
   class KokkosLMP *kokkos;       // KOKKOS accelerator class
   class AtomKokkos *atomKK;      // KOKKOS version of Atom class
 
diff --git a/src/output.cpp b/src/output.cpp
index db667e8694baf06a1a61f067d81f2e2da0287a12..073171735fddd83b51e822b8c1706b9b15545fa3 100644
--- a/src/output.cpp
+++ b/src/output.cpp
@@ -30,7 +30,6 @@
 #include "force.h"
 #include "dump.h"
 #include "write_restart.h"
-#include "accelerator_cuda.h"
 #include "memory.h"
 #include "error.h"
 
@@ -290,11 +289,8 @@ void Output::write(bigint ntimestep)
 {
   // next_dump does not force output on last step of run
   // wrap dumps that invoke computes or eval of variable with clear/add
-  // download data from GPU if necessary
 
   if (next_dump_any == ntimestep) {
-    if (lmp->cuda && !lmp->cuda->oncpu) lmp->cuda->downloadAll();
-
     for (int idump = 0; idump < ndump; idump++) {
       if (next_dump[idump] == ntimestep) {
         if (dump[idump]->clearstep || every_dump[idump] == 0)
@@ -321,12 +317,9 @@ void Output::write(bigint ntimestep)
 
   // next_restart does not force output on last step of run
   // for toggle = 0, replace "*" with current timestep in restart filename
-  // download data from GPU if necessary
   // eval of variable may invoke computes so wrap with clear/add
 
   if (next_restart == ntimestep) {
-    if (lmp->cuda && !lmp->cuda->oncpu) lmp->cuda->downloadAll();
-
     if (next_restart_single == ntimestep) {
       char *file = new char[strlen(restart1) + 16];
       char *ptr = strchr(restart1,'*');
diff --git a/src/pair.cpp b/src/pair.cpp
index c169177ea906822de92b89d7c9edc2b1b94e26f9..a5e6d134514ab5a46787c81864e93b8d0b647a6b 100644
--- a/src/pair.cpp
+++ b/src/pair.cpp
@@ -34,7 +34,6 @@
 #include "update.h"
 #include "modify.h"
 #include "compute.h"
-#include "accelerator_cuda.h"
 #include "suffix.h"
 #include "atom_masks.h"
 #include "memory.h"
@@ -99,7 +98,7 @@ Pair::Pair(LAMMPS *lmp) : Pointers(lmp)
   num_tally_compute = 0;
   list_tally_compute = NULL;
 
-  // CUDA and KOKKOS per-fix data masks
+  // KOKKOS per-fix data masks
 
   datamask = ALL_MASK;
   datamask_ext = ALL_MASK;
@@ -810,8 +809,6 @@ void Pair::ev_setup(int eflag, int vflag)
     if (vflag_atom == 0) vflag_either = 0;
     if (vflag_either == 0 && eflag_either == 0) evflag = 0;
   } else vflag_fdotr = 0;
-
-  if (lmp->cuda) lmp->cuda->evsetup_eatom_vatom(eflag_atom,vflag_atom);
 }
 
 /* ----------------------------------------------------------------------
diff --git a/src/update.cpp b/src/update.cpp
index f53acbed3ac37804582f22d156abe9991a77f43f..8bb5bca4874bc355e06ee1c6197fc58886fd974b 100644
--- a/src/update.cpp
+++ b/src/update.cpp
@@ -85,16 +85,6 @@ Update::~Update()
 
 void Update::init()
 {
-  // if USER-CUDA mode is enabled:
-  // integrate/minimize style must be CUDA variant
-
-  if (whichflag == 1 && lmp->cuda)
-    if (strstr(integrate_style,"cuda") == NULL)
-      error->all(FLERR,"USER-CUDA mode requires CUDA variant of run style");
-  if (whichflag == 2 && lmp->cuda)
-    if (strstr(minimize_style,"cuda") == NULL)
-      error->all(FLERR,"USER-CUDA mode requires CUDA variant of min style");
-
   // init the appropriate integrate and/or minimize class
   // if neither (e.g. from write_restart) then just return