diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp
index e506fa1ad42c849d0b31b0e564a45842d6aa1cfe..03b3074eb109151a6aa4fc416773c92bf17c0432 100644
--- a/src/KOKKOS/comm_kokkos.cpp
+++ b/src/KOKKOS/comm_kokkos.cpp
@@ -200,6 +200,7 @@ void CommKokkos::forward_comm_device(int dummy)
         }
         n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,
                                    iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]);
+        DeviceType::fence();
         if (n) {
           MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),
                    n,MPI_DOUBLE,sendproc[iswap],0,world);
@@ -229,11 +230,13 @@ void CommKokkos::forward_comm_device(int dummy)
                     recvproc[iswap],0,world,&request);
         n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,iswap,
                                    k_buf_send,pbc_flag[iswap],pbc[iswap]);
+        DeviceType::fence();
         if (n)
           MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),n,
                    MPI_DOUBLE,sendproc[iswap],0,world);
         if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
         avec->unpack_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv);
+        DeviceType::fence();
       }
 
     } else {
@@ -395,6 +398,7 @@ void CommKokkos::forward_comm_pair_device(Pair *pair)
 
     n = pairKKBase->pack_forward_comm_kokkos(sendnum[iswap],k_sendlist,
                                        iswap,k_buf_send_pair,pbc_flag[iswap],pbc[iswap]);
+    DeviceType::fence();
 
     // exchange with another proc
     // if self, set recv buffer to send buffer
@@ -411,6 +415,7 @@ void CommKokkos::forward_comm_pair_device(Pair *pair)
     // unpack buffer
 
     pairKKBase->unpack_forward_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv_pair);
+    DeviceType::fence();
   }
 }
 
@@ -610,6 +615,7 @@ void CommKokkos::exchange_device()
                                    k_exchange_sendlist,k_exchange_copylist,
                                    ExecutionSpaceFromDevice<DeviceType>::
                                    space,dim,lo,hi);
+      DeviceType::fence();
 
     } else {
       while (i < nlocal) {
@@ -634,6 +640,7 @@ void CommKokkos::exchange_device()
         atom->nlocal=avec->
           unpack_exchange_kokkos(k_buf_send,nrecv,atom->nlocal,dim,lo,hi,
                                  ExecutionSpaceFromDevice<DeviceType>::space);
+        DeviceType::fence();
       }
     } else {
       MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][0],0,
@@ -666,6 +673,7 @@ void CommKokkos::exchange_device()
         atom->nlocal = avec->
           unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi,
                                  ExecutionSpaceFromDevice<DeviceType>::space);
+        DeviceType::fence();
       }
     }
 
@@ -925,11 +933,14 @@ void CommKokkos::borders_device() {
                    "implemented with Kokkos");
         n = avec->pack_border_vel(nsend,sendlist[iswap],buf_send,
                                   pbc_flag[iswap],pbc[iswap]);
+        DeviceType::fence();
       }
-      else
+      else {
         n = avec->
           pack_border_kokkos(nsend,k_sendlist,k_buf_send,iswap,
                              pbc_flag[iswap],pbc[iswap],exec_space);
+        DeviceType::fence();
+      }
 
       // swap atoms with other proc
       // no MPI calls except SendRecv if nsend/nrecv = 0
@@ -960,12 +971,15 @@ void CommKokkos::borders_device() {
         avec->unpack_border_vel(nrecv,atom->nlocal+atom->nghost,buf);
       }
       else
-        if (sendproc[iswap] != me)
+        if (sendproc[iswap] != me) {
           avec->unpack_border_kokkos(nrecv,atom->nlocal+atom->nghost,
                                      k_buf_recv,exec_space);
-        else
+          DeviceType::fence();
+        } else {
           avec->unpack_border_kokkos(nrecv,atom->nlocal+atom->nghost,
                                      k_buf_send,exec_space);
+          DeviceType::fence();
+        }
 
       // set all pointers & counters
 
diff --git a/src/KOKKOS/gridcomm_kokkos.cpp b/src/KOKKOS/gridcomm_kokkos.cpp
index fdfaf296ef335f30654c1aea300db1102dfca772..f107370514001d22c2f054f52b2c98ddd312ded6 100644
--- a/src/KOKKOS/gridcomm_kokkos.cpp
+++ b/src/KOKKOS/gridcomm_kokkos.cpp
@@ -523,6 +523,7 @@ void GridCommKokkos<DeviceType>::forward_comm(KSpace *kspace, int which)
       kspaceKKBase->pack_forward_kspace_kokkos(which,k_buf2,swap[m].npack,k_packlist,m);
     else
       kspaceKKBase->pack_forward_kspace_kokkos(which,k_buf1,swap[m].npack,k_packlist,m);
+    DeviceType::fence();
 
     if (swap[m].sendproc != me) {
       MPI_Irecv(k_buf2.view<DeviceType>().ptr_on_device(),nforward*swap[m].nunpack,MPI_FFT_SCALAR,
@@ -533,6 +534,7 @@ void GridCommKokkos<DeviceType>::forward_comm(KSpace *kspace, int which)
     }
 
     kspaceKKBase->unpack_forward_kspace_kokkos(which,k_buf2,swap[m].nunpack,k_unpacklist,m);
+    DeviceType::fence();
   }
 }
 
@@ -554,6 +556,7 @@ void GridCommKokkos<DeviceType>::reverse_comm(KSpace *kspace, int which)
       kspaceKKBase->pack_reverse_kspace_kokkos(which,k_buf2,swap[m].nunpack,k_unpacklist,m);
     else
       kspaceKKBase->pack_reverse_kspace_kokkos(which,k_buf1,swap[m].nunpack,k_unpacklist,m);
+    DeviceType::fence();
 
     if (swap[m].recvproc != me) {
       MPI_Irecv(k_buf2.view<DeviceType>().ptr_on_device(),nreverse*swap[m].npack,MPI_FFT_SCALAR,
@@ -564,6 +567,7 @@ void GridCommKokkos<DeviceType>::reverse_comm(KSpace *kspace, int which)
     }
 
     kspaceKKBase->unpack_reverse_kspace_kokkos(which,k_buf2,swap[m].npack,k_packlist,m);
+    DeviceType::fence();
   }
 }