diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index e506fa1ad42c849d0b31b0e564a45842d6aa1cfe..03b3074eb109151a6aa4fc416773c92bf17c0432 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -200,6 +200,7 @@ void CommKokkos::forward_comm_device(int dummy) } n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist, iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]); + DeviceType::fence(); if (n) { MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(), n,MPI_DOUBLE,sendproc[iswap],0,world); @@ -229,11 +230,13 @@ void CommKokkos::forward_comm_device(int dummy) recvproc[iswap],0,world,&request); n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,iswap, k_buf_send,pbc_flag[iswap],pbc[iswap]); + DeviceType::fence(); if (n) MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),n, MPI_DOUBLE,sendproc[iswap],0,world); if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); avec->unpack_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv); + DeviceType::fence(); } } else { @@ -395,6 +398,7 @@ void CommKokkos::forward_comm_pair_device(Pair *pair) n = pairKKBase->pack_forward_comm_kokkos(sendnum[iswap],k_sendlist, iswap,k_buf_send_pair,pbc_flag[iswap],pbc[iswap]); + DeviceType::fence(); // exchange with another proc // if self, set recv buffer to send buffer @@ -411,6 +415,7 @@ void CommKokkos::forward_comm_pair_device(Pair *pair) // unpack buffer pairKKBase->unpack_forward_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv_pair); + DeviceType::fence(); } } @@ -610,6 +615,7 @@ void CommKokkos::exchange_device() k_exchange_sendlist,k_exchange_copylist, ExecutionSpaceFromDevice<DeviceType>:: space,dim,lo,hi); + DeviceType::fence(); } else { while (i < nlocal) { @@ -634,6 +640,7 @@ void CommKokkos::exchange_device() atom->nlocal=avec-> unpack_exchange_kokkos(k_buf_send,nrecv,atom->nlocal,dim,lo,hi, ExecutionSpaceFromDevice<DeviceType>::space); + DeviceType::fence(); } } else { MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][0],0, @@ -666,6 +673,7 @@ void CommKokkos::exchange_device() atom->nlocal = avec-> unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi, ExecutionSpaceFromDevice<DeviceType>::space); + DeviceType::fence(); } } @@ -925,11 +933,14 @@ void CommKokkos::borders_device() { "implemented with Kokkos"); n = avec->pack_border_vel(nsend,sendlist[iswap],buf_send, pbc_flag[iswap],pbc[iswap]); + DeviceType::fence(); } - else + else { n = avec-> pack_border_kokkos(nsend,k_sendlist,k_buf_send,iswap, pbc_flag[iswap],pbc[iswap],exec_space); + DeviceType::fence(); + } // swap atoms with other proc // no MPI calls except SendRecv if nsend/nrecv = 0 @@ -960,12 +971,15 @@ void CommKokkos::borders_device() { avec->unpack_border_vel(nrecv,atom->nlocal+atom->nghost,buf); } else - if (sendproc[iswap] != me) + if (sendproc[iswap] != me) { avec->unpack_border_kokkos(nrecv,atom->nlocal+atom->nghost, k_buf_recv,exec_space); - else + DeviceType::fence(); + } else { avec->unpack_border_kokkos(nrecv,atom->nlocal+atom->nghost, k_buf_send,exec_space); + DeviceType::fence(); + } // set all pointers & counters diff --git a/src/KOKKOS/gridcomm_kokkos.cpp b/src/KOKKOS/gridcomm_kokkos.cpp index fdfaf296ef335f30654c1aea300db1102dfca772..f107370514001d22c2f054f52b2c98ddd312ded6 100644 --- a/src/KOKKOS/gridcomm_kokkos.cpp +++ b/src/KOKKOS/gridcomm_kokkos.cpp @@ -523,6 +523,7 @@ void GridCommKokkos<DeviceType>::forward_comm(KSpace *kspace, int which) kspaceKKBase->pack_forward_kspace_kokkos(which,k_buf2,swap[m].npack,k_packlist,m); else kspaceKKBase->pack_forward_kspace_kokkos(which,k_buf1,swap[m].npack,k_packlist,m); + DeviceType::fence(); if (swap[m].sendproc != me) { MPI_Irecv(k_buf2.view<DeviceType>().ptr_on_device(),nforward*swap[m].nunpack,MPI_FFT_SCALAR, @@ -533,6 +534,7 @@ void GridCommKokkos<DeviceType>::forward_comm(KSpace *kspace, int which) } kspaceKKBase->unpack_forward_kspace_kokkos(which,k_buf2,swap[m].nunpack,k_unpacklist,m); + DeviceType::fence(); } } @@ -554,6 +556,7 @@ void GridCommKokkos<DeviceType>::reverse_comm(KSpace *kspace, int which) kspaceKKBase->pack_reverse_kspace_kokkos(which,k_buf2,swap[m].nunpack,k_unpacklist,m); else kspaceKKBase->pack_reverse_kspace_kokkos(which,k_buf1,swap[m].nunpack,k_unpacklist,m); + DeviceType::fence(); if (swap[m].recvproc != me) { MPI_Irecv(k_buf2.view<DeviceType>().ptr_on_device(),nreverse*swap[m].npack,MPI_FFT_SCALAR, @@ -564,6 +567,7 @@ void GridCommKokkos<DeviceType>::reverse_comm(KSpace *kspace, int which) } kspaceKKBase->unpack_reverse_kspace_kokkos(which,k_buf2,swap[m].npack,k_packlist,m); + DeviceType::fence(); } }