diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp index f2c04bec1b6281b3922513628b2a655e44c9baf7..555bb2e7478e0b11ae64e58f2208842a2124215b 100644 --- a/src/KOKKOS/atom_vec_kokkos.cpp +++ b/src/KOKKOS/atom_vec_kokkos.cpp @@ -305,6 +305,277 @@ void AtomVecKokkos::unpack_comm_kokkos(const int &n, const int &first, } } + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType,int PBC_FLAG,int TRICLINIC,int DEFORM_VREMAP> +struct AtomVecKokkos_PackCommVel { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_x_array_randomread _x; + typename ArrayTypes<DeviceType>::t_int_1d _mask; + typename ArrayTypes<DeviceType>::t_v_array _v; + typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf; + typename ArrayTypes<DeviceType>::t_int_2d_const _list; + const int _iswap; + X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; + X_FLOAT _pbc[6]; + X_FLOAT _h_rate[6]; + const int _deform_vremap; + + AtomVecKokkos_PackCommVel( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_int_1d &mask, + const typename DAT::tdual_v_array &v, + const typename DAT::tdual_xfloat_2d &buf, + const typename DAT::tdual_int_2d &list, + const int &iswap, + const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, + const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc, + const double * const h_rate, + const int &deform_vremap): + _x(x.view<DeviceType>()), + _mask(mask.view<DeviceType>()), + _v(v.view<DeviceType>()), + _list(list.view<DeviceType>()),_iswap(iswap), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz), + _deform_vremap(deform_vremap) + { + const size_t elements = 6; + const int maxsend = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements; + _buf = typename ArrayTypes<DeviceType>::t_xfloat_2d_um(buf.view<DeviceType>().data(),maxsend,elements); + _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; + _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; + _h_rate[0] = h_rate[0]; _h_rate[1] = h_rate[1]; _h_rate[2] = h_rate[2]; + _h_rate[3] = h_rate[3]; _h_rate[4] = h_rate[4]; _h_rate[5] = h_rate[5]; + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _buf(i,0) = _x(j,0); + _buf(i,1) = _x(j,1); + _buf(i,2) = _x(j,2); + _buf(i,3) = _v(j,0); + _buf(i,4) = _v(j,1); + _buf(i,5) = _v(j,2); + } else { + if (TRICLINIC == 0) { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + } else { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + } + + if (DEFORM_VREMAP == 0) { + _buf(i,3) = _v(j,0); + _buf(i,4) = _v(j,1); + _buf(i,5) = _v(j,2); + } else { + if (_mask(i) & _deform_vremap) { + _buf(i,3) = _v(j,0) + _pbc[0]*_h_rate[0] + _pbc[5]*_h_rate[5] + _pbc[4]*_h_rate[4]; + _buf(i,4) = _v(j,1) + _pbc[1]*_h_rate[1] + _pbc[3]*_h_rate[3]; + _buf(i,5) = _v(j,2) + _pbc[2]*_h_rate[2]; + } else { + _buf(i,3) = _v(j,0); + _buf(i,4) = _v(j,1); + _buf(i,5) = _v(j,2); + } + } + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecKokkos::pack_comm_vel_kokkos( + const int &n, + const DAT::tdual_int_2d &list, + const int & iswap, + const DAT::tdual_xfloat_2d &buf, + const int &pbc_flag, + const int* const pbc) +{ + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK|V_MASK); + if (pbc_flag) { + if (deform_vremap) { + if (domain->triclinic) { + struct AtomVecKokkos_PackCommVel<LMPHostType,1,1,1> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommVel<LMPHostType,1,0,1> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecKokkos_PackCommVel<LMPHostType,1,1,0> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommVel<LMPHostType,1,0,0> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } + } + } else { + if(domain->triclinic) { + struct AtomVecKokkos_PackCommVel<LMPHostType,0,1,0> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommVel<LMPHostType,0,0,0> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } + } + } else { + sync(Device,X_MASK|V_MASK); + if(pbc_flag) { + if(deform_vremap) { + if(domain->triclinic) { + struct AtomVecKokkos_PackCommVel<LMPDeviceType,1,1,1> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommVel<LMPDeviceType,1,0,1> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecKokkos_PackCommVel<LMPDeviceType,1,1,0> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommVel<LMPDeviceType,1,0,0> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } + } + } else { + if(domain->triclinic) { + struct AtomVecKokkos_PackCommVel<LMPDeviceType,0,1,0> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommVel<LMPDeviceType,0,0,0> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } + } + } + return n*6; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +struct AtomVecKokkos_UnpackCommVel { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_x_array _x; + typename ArrayTypes<DeviceType>::t_v_array _v; + typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf; + int _first; + + AtomVecKokkos_UnpackCommVel( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_v_array &v, + const typename DAT::tdual_xfloat_2d &buf, + const int& first): + _x(x.view<DeviceType>()), + _v(v.view<DeviceType>()), + _first(first) + { + const size_t elements = 6; + const int maxsend = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements; + buffer_view<DeviceType>(_buf,buf,maxsend,elements); + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + _x(i+_first,0) = _buf(i,0); + _x(i+_first,1) = _buf(i,1); + _x(i+_first,2) = _buf(i,2); + _v(i+_first,0) = _buf(i,3); + _v(i+_first,1) = _buf(i,4); + _v(i+_first,2) = _buf(i,5); + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecKokkos::unpack_comm_vel_kokkos(const int &n, const int &first, + const DAT::tdual_xfloat_2d &buf ) { + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK|V_MASK); + modified(Host,X_MASK|V_MASK); + struct AtomVecKokkos_UnpackCommVel<LMPHostType> f(atomKK->k_x,atomKK->k_v,buf,first); + Kokkos::parallel_for(n,f); + } else { + sync(Device,X_MASK|V_MASK); + modified(Device,X_MASK|V_MASK); + struct AtomVecKokkos_UnpackCommVel<LMPDeviceType> f(atomKK->k_x,atomKK->k_v,buf,first); + Kokkos::parallel_for(n,f); + } +} + /* ---------------------------------------------------------------------- */ int AtomVecKokkos::pack_comm(int n, int *list, double *buf, diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h index 38ffed9d374b47143d93c39e5d98046c1298cadb..e1d626ac1b0cfb8e3efbcfcffcde56754e8e0c8d 100644 --- a/src/KOKKOS/atom_vec_kokkos.h +++ b/src/KOKKOS/atom_vec_kokkos.h @@ -63,11 +63,11 @@ class AtomVecKokkos : public AtomVec { virtual int pack_comm_vel_kokkos(const int &n, const DAT::tdual_int_2d &list, const int & iswap, const DAT::tdual_xfloat_2d &buf, - const int &pbc_flag, const int pbc[]) { return 0; } + const int &pbc_flag, const int pbc[]); virtual void unpack_comm_vel_kokkos(const int &n, const int &nfirst, - const DAT::tdual_xfloat_2d &buf) {} + const DAT::tdual_xfloat_2d &buf); virtual int unpack_reverse_self(const int &n, const DAT::tdual_int_2d &list,