diff --git a/doc/src/package.txt b/doc/src/package.txt
index 55bd111b50ad4b37100af9704cb18458b6a71d2a..3d25a64d3174aacc9218ed4d26e96cae5355ddef 100644
--- a/doc/src/package.txt
+++ b/doc/src/package.txt
@@ -512,9 +512,12 @@ this keyword is set to {on}, buffers in GPU memory are passed directly
 through MPI send/receive calls. This reduces overhead of first copying
 the data to the host CPU. However GPU-direct is not supported on all
 systems, which can lead to segmentation faults and would require
-using a value of {off}. When the {gpu/direct} keyword is set to {off}
-while any of the {comm} keywords are set to {device}, the value for the
-{comm} keywords will be automatically changed to {host}.
+using a value of {off}. If LAMMPS can safely detect that GPU-direct is
+not available (currently only possible with OpenMPI v2.0.0 or later),
+then the {gpu/direct} keyword is automatically set to {off} by default.
+When the {gpu/direct} keyword is set to {off} while any of the {comm}
+keywords are set to {device}, the value for these {comm} keywords will
+be automatically changed to {host}.
 
 :line
 
@@ -624,6 +627,8 @@ switch"_Section_start.html#start_6.
 
 For the KOKKOS package, the option defaults neigh = full, neigh/qeq =
 full, newton = off, binsize = 0.0, and comm = device, gpu/direct = on.
+When LAMMPS can safely detect, that GPU-direct is not available, the
+default value of gpu/direct becomes "off".
 These settings are made automatically by the required "-k on"
 "command-line switch"_Section_start.html#start_6. You can change them by
 using the package kokkos command in your input script or via the "-pk
diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp
index 6190d71b259235078566e69b08bfc9e384b4d4bb..21840b7c3ee8fd396b90e9983cd57c2d49ef64aa 100644
--- a/src/KOKKOS/comm_kokkos.cpp
+++ b/src/KOKKOS/comm_kokkos.cpp
@@ -406,7 +406,7 @@ void CommKokkos::forward_comm_pair_device(Pair *pair)
     if (sendproc[iswap] != me) {
       double* buf_send_pair;
       double* buf_recv_pair;
-      if (lmp->kokkos->gpu_direct) {
+      if (lmp->kokkos->gpu_direct_flag) {
         buf_send_pair = k_buf_send_pair.view<DeviceType>().data();
         buf_recv_pair = k_buf_recv_pair.view<DeviceType>().data();
       } else {
@@ -424,7 +424,7 @@ void CommKokkos::forward_comm_pair_device(Pair *pair)
         MPI_Send(buf_send_pair,n,MPI_DOUBLE,sendproc[iswap],0,world);
       if (recvnum[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
 
-      if (!lmp->kokkos->gpu_direct) {
+      if (!lmp->kokkos->gpu_direct_flag) {
         k_buf_recv_pair.modify<LMPHostType>();
         k_buf_recv_pair.sync<DeviceType>();
       }
diff --git a/src/KOKKOS/gridcomm_kokkos.cpp b/src/KOKKOS/gridcomm_kokkos.cpp
index 44795b569de1b0f0babfa6d9d769c7c8fec68a12..64a9d6992f0f56a64e5cbbb3d6fb674929f5ca41 100644
--- a/src/KOKKOS/gridcomm_kokkos.cpp
+++ b/src/KOKKOS/gridcomm_kokkos.cpp
@@ -529,7 +529,7 @@ void GridCommKokkos<DeviceType>::forward_comm(KSpace *kspace, int which)
     if (swap[m].sendproc != me) {
       FFT_SCALAR* buf1;
       FFT_SCALAR* buf2;
-      if (lmp->kokkos->gpu_direct) {
+      if (lmp->kokkos->gpu_direct_flag) {
         buf1 = k_buf1.view<DeviceType>().data();
         buf2 = k_buf2.view<DeviceType>().data();
       } else {
@@ -545,7 +545,7 @@ void GridCommKokkos<DeviceType>::forward_comm(KSpace *kspace, int which)
                swap[m].sendproc,0,gridcomm);
       MPI_Wait(&request,MPI_STATUS_IGNORE);
 
-      if (!lmp->kokkos->gpu_direct) {
+      if (!lmp->kokkos->gpu_direct_flag) {
         k_buf2.modify<LMPHostType>();
         k_buf2.sync<DeviceType>();
       }
@@ -579,7 +579,7 @@ void GridCommKokkos<DeviceType>::reverse_comm(KSpace *kspace, int which)
     if (swap[m].recvproc != me) {
       FFT_SCALAR* buf1;
       FFT_SCALAR* buf2;
-      if (lmp->kokkos->gpu_direct) {
+      if (lmp->kokkos->gpu_direct_flag) {
         buf1 = k_buf1.view<DeviceType>().data();
         buf2 = k_buf2.view<DeviceType>().data();
       } else {
@@ -595,7 +595,7 @@ void GridCommKokkos<DeviceType>::reverse_comm(KSpace *kspace, int which)
                swap[m].recvproc,0,gridcomm);
       MPI_Wait(&request,MPI_STATUS_IGNORE);
 
-      if (!lmp->kokkos->gpu_direct) {
+      if (!lmp->kokkos->gpu_direct_flag) {
         k_buf2.modify<LMPHostType>();
         k_buf2.sync<DeviceType>();
       }
diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp
index 050b1420d32ffb34fd2b774166518d67ec07111c..fb6b8d8d45aa5d4e1e582c9b745ec53906aa999b 100644
--- a/src/KOKKOS/kokkos.cpp
+++ b/src/KOKKOS/kokkos.cpp
@@ -158,14 +158,12 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
                      " but cannot determine if this is the case\n         try"
                      " '-pk kokkos gpu/direct off' when getting segmentation faults");
     } else if ( 0 == have_gpu_direct() ) {
-      error->warning(FLERR,"GPU-direct is NOT available, but some parts of "
-                     "Kokkos with CUDA require it by default\n         try"
-                     " '-pk kokkos gpu/direct off' when getting segmentation faults");
+      error->warning(FLERR,"GPU-direct is NOT available, "
+                     "using '-pk kokkos gpu/direct off' by default");
     } else {
       ; // should never get here
     }
   }
-
 #endif
 
   Kokkos::InitArguments args;
@@ -186,7 +184,12 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
   exchange_comm_on_host = 0;
   forward_comm_on_host = 0;
   reverse_comm_on_host = 0;
-  gpu_direct = 1;
+  gpu_direct_flag = 1;
+
+#if KOKKOS_USE_CUDA
+  // only if we can safely detect, that GPU-direct is not available, change default
+  if (0 == have_gpu_direct()) gpu_direct_flag = 0;
+#endif
 
 #ifdef KILL_KOKKOS_ON_SIGSEGV
   signal(SIGSEGV, my_signal_handler);
@@ -217,7 +220,7 @@ void KokkosLMP::accelerator(int narg, char **arg)
   double binsize = 0.0;
   exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
   exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
-  gpu_direct = 1;
+  gpu_direct_flag = 1;
 
   int iarg = 0;
   while (iarg < narg) {
@@ -303,8 +306,8 @@ void KokkosLMP::accelerator(int narg, char **arg)
       iarg += 2;
     } else if (strcmp(arg[iarg],"gpu/direct") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
-      if (strcmp(arg[iarg+1],"off") == 0) gpu_direct = 0;
-      else if (strcmp(arg[iarg+1],"on") == 0) gpu_direct = 1;
+      if (strcmp(arg[iarg+1],"off") == 0) gpu_direct_flag = 0;
+      else if (strcmp(arg[iarg+1],"on") == 0) gpu_direct_flag = 1;
       else error->all(FLERR,"Illegal package kokkos command");
       iarg += 2;
     } else error->all(FLERR,"Illegal package kokkos command");
@@ -312,7 +315,7 @@ void KokkosLMP::accelerator(int narg, char **arg)
 
   // if "gpu/direct off" and "comm device", change to "comm host"
 
-  if (!gpu_direct) {
+  if (!gpu_direct_flag) {
    if (exchange_comm_classic == 0 && exchange_comm_on_host == 0)
      exchange_comm_on_host = 1;
    if (forward_comm_classic == 0 && forward_comm_on_host == 0)
diff --git a/src/KOKKOS/kokkos.h b/src/KOKKOS/kokkos.h
index c3c5d0d6e1f142c4b426b11f2b8f47e002c455c4..cf209c0adb97e10a7bf5b45daa4502b3ad15d46a 100644
--- a/src/KOKKOS/kokkos.h
+++ b/src/KOKKOS/kokkos.h
@@ -34,7 +34,7 @@ class KokkosLMP : protected Pointers {
   int num_threads,ngpu;
   int numa;
   int auto_sync;
-  int gpu_direct;
+  int gpu_direct_flag;
 
   KokkosLMP(class LAMMPS *, int, char **);
   ~KokkosLMP();