diff --git a/doc/src/Commands_fix.txt b/doc/src/Commands_fix.txt index 3c17fb70568b898fa76618c9f6ccccbc043e8cc8..58828d5c9edb0da368a86018cf6f1b3e6087453d 100644 --- a/doc/src/Commands_fix.txt +++ b/doc/src/Commands_fix.txt @@ -68,11 +68,11 @@ OPT. "ffl"_fix_ffl.html, "filter/corotate"_fix_filter_corotate.html, "flow/gauss"_fix_flow_gauss.html, -"freeze"_fix_freeze.html, +"freeze (k)"_fix_freeze.html, "gcmc"_fix_gcmc.html, "gld"_fix_gld.html, "gle"_fix_gle.html, -"gravity (o)"_fix_gravity.html, +"gravity (ko)"_fix_gravity.html, "grem"_fix_grem.html, "halt"_fix_halt.html, "heat"_fix_heat.html, @@ -105,7 +105,7 @@ OPT. "nph/asphere (o)"_fix_nph_asphere.html, "nph/body"_fix_nph_body.html, "nph/eff"_fix_nh_eff.html, -"nph/sphere (o)"_fix_nph_sphere.html, +"nph/sphere (ko)"_fix_nph_sphere.html, "nphug (o)"_fix_nphug.html, "npt (kio)"_fix_nh.html, "npt/asphere (o)"_fix_npt_asphere.html, diff --git a/doc/src/Commands_pair.txt b/doc/src/Commands_pair.txt index 5715c0123b278b5b34d6374abbc651caa99b3c54..090101d5eac69e7e4f7247dbd55fab6b1524e7bd 100644 --- a/doc/src/Commands_pair.txt +++ b/doc/src/Commands_pair.txt @@ -95,7 +95,7 @@ OPT. "gayberne (gio)"_pair_gayberne.html, "gran/hertz/history (o)"_pair_gran.html, "gran/hooke (o)"_pair_gran.html, -"gran/hooke/history (o)"_pair_gran.html, +"gran/hooke/history (ko)"_pair_gran.html, "gw"_pair_gw.html, "gw/zbl"_pair_gw.html, "hbond/dreiding/lj (o)"_pair_hbond_dreiding.html, diff --git a/doc/src/fix_freeze.txt b/doc/src/fix_freeze.txt index 89f2dd8179f9f6ab02cf054c24f790e9c10e7662..9e085d8b1dc68470b6d991b1ca430f055cb08d83 100644 --- a/doc/src/fix_freeze.txt +++ b/doc/src/fix_freeze.txt @@ -7,6 +7,7 @@ :line fix freeze command :h3 +fix freeze/kk command :h3 [Syntax:] diff --git a/doc/src/fix_gravity.txt b/doc/src/fix_gravity.txt index ea596c866867e89f3c4b6b8a598ead8662976e57..c529a04d34068940256e38bda3948cd796fcbe17 100644 --- a/doc/src/fix_gravity.txt +++ b/doc/src/fix_gravity.txt @@ -8,6 +8,7 @@ fix gravity command :h3 fix gravity/omp command :h3 +fix gravity/kk command :h3 [Syntax:] diff --git a/doc/src/fix_nve_sphere.txt b/doc/src/fix_nve_sphere.txt index 818940a9bd1ebda7b50c44120a209aaa4ef4e270..225359dfa7f03b56e6e0636eca55f70384853e19 100644 --- a/doc/src/fix_nve_sphere.txt +++ b/doc/src/fix_nve_sphere.txt @@ -8,6 +8,7 @@ fix nve/sphere command :h3 fix nve/sphere/omp command :h3 +fix nve/sphere/kk command :h3 [Syntax:] diff --git a/doc/src/pair_gran.txt b/doc/src/pair_gran.txt index 5beaf30e06edb9cd288ce9294b1b31eac664d110..e322f8da15b54e39bf9abe3702a6a3d77e130202 100644 --- a/doc/src/pair_gran.txt +++ b/doc/src/pair_gran.txt @@ -10,6 +10,7 @@ pair_style gran/hooke command :h3 pair_style gran/hooke/omp command :h3 pair_style gran/hooke/history command :h3 pair_style gran/hooke/history/omp command :h3 +pair_style gran/hooke/history/kk command :h3 pair_style gran/hertz/history command :h3 pair_style gran/hertz/history/omp command :h3 diff --git a/src/GRANULAR/fix_freeze.h b/src/GRANULAR/fix_freeze.h index 52badc3e1bed7a2aa35c0fdcf61c0673d3292f16..a249226aa7840d335924088919ea765af7996776 100644 --- a/src/GRANULAR/fix_freeze.h +++ b/src/GRANULAR/fix_freeze.h @@ -34,7 +34,7 @@ class FixFreeze : public Fix { void post_force_respa(int, int, int); double compute_vector(int); - private: + protected: int force_flag; double foriginal[3],foriginal_all[3]; }; diff --git a/src/GRANULAR/fix_pour.cpp b/src/GRANULAR/fix_pour.cpp index 318c54c99a8844bea5f3f86d92c9ebd67f23abd3..3ffca8db9d77d304121c12894195f1f7013cd826 100644 --- a/src/GRANULAR/fix_pour.cpp +++ b/src/GRANULAR/fix_pour.cpp @@ -53,6 +53,8 @@ FixPour::FixPour(LAMMPS *lmp, int narg, char **arg) : { if (narg < 6) error->all(FLERR,"Illegal fix pour command"); + if (lmp->kokkos) error->all(FLERR,"Cannot yet use fix pour with the KOKKOS package"); + time_depend = 1; if (!atom->radius_flag || !atom->rmass_flag) @@ -181,6 +183,7 @@ FixPour::FixPour(LAMMPS *lmp, int narg, char **arg) : for (ifix = 0; ifix < modify->nfix; ifix++) { if (strcmp(modify->fix[ifix]->style,"gravity") == 0) break; if (strcmp(modify->fix[ifix]->style,"gravity/omp") == 0) break; + if (strstr(modify->fix[ifix]->style,"gravity/kk") != NULL) break; } if (ifix == modify->nfix) error->all(FLERR,"No fix gravity defined for fix pour"); @@ -315,6 +318,7 @@ void FixPour::init() for (ifix = 0; ifix < modify->nfix; ifix++) { if (strcmp(modify->fix[ifix]->style,"gravity") == 0) break; if (strcmp(modify->fix[ifix]->style,"gravity/omp") == 0) break; + if (strstr(modify->fix[ifix]->style,"gravity/kk") != NULL) break; } if (ifix == modify->nfix) error->all(FLERR,"No fix gravity defined for fix pour"); diff --git a/src/GRANULAR/fix_pour.h b/src/GRANULAR/fix_pour.h index c65fa6a18f5b1257661883483cb6e9410eb38954..63d0d39152964677d199312a748a3669cc1e7867 100644 --- a/src/GRANULAR/fix_pour.h +++ b/src/GRANULAR/fix_pour.h @@ -89,6 +89,10 @@ Self-explanatory. Check the input script syntax and compare to the documentation for the command. You can use -echo screen as a command-line option when running LAMMPS to see the offending line. +E: Cannot yet use fix pour with the KOKKOS package + +This feature is not yet supported. + E: Fix pour requires atom attributes radius, rmass The atom style defined does not have these attributes. diff --git a/src/GRANULAR/pair_gran_hooke_history.cpp b/src/GRANULAR/pair_gran_hooke_history.cpp index c014510fca2d36d9575c32600f64c27f3a426747..83f75c221dc0c9497407884a47594d5a3866b107 100644 --- a/src/GRANULAR/pair_gran_hooke_history.cpp +++ b/src/GRANULAR/pair_gran_hooke_history.cpp @@ -63,6 +63,8 @@ PairGranHookeHistory::PairGranHookeHistory(LAMMPS *lmp) : Pair(lmp) PairGranHookeHistory::~PairGranHookeHistory() { + if (copymode) return; + delete [] svector; if (fix_history) modify->delete_fix("NEIGH_HISTORY"); diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index 08c7468a4971a0fb1f00d8071f231b30d3e1e2c8..321fa34ad7657cb2eda951b32df114ef427091b9 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -71,6 +71,8 @@ action atom_vec_kokkos.cpp action atom_vec_kokkos.h action atom_vec_molecular_kokkos.cpp atom_vec_molecular.cpp action atom_vec_molecular_kokkos.h atom_vec_molecular.h +action atom_vec_sphere_kokkos.cpp atom_vec_sphere.cpp +action atom_vec_sphere_kokkos.h atom_vec_sphere.h action bond_class2_kokkos.cpp bond_class2.cpp action bond_class2_kokkos.h bond_class2.h action bond_fene_kokkos.cpp bond_fene.cpp @@ -97,8 +99,14 @@ action fix_enforce2d_kokkos.cpp action fix_enforce2d_kokkos.h action fix_eos_table_rx_kokkos.cpp fix_eos_table_rx.cpp action fix_eos_table_rx_kokkos.h fix_eos_table_rx.h +action fix_freeze_kokkos.cpp fix_freeze.cpp +action fix_freeze_kokkos.h fix_freeze.h +action fix_gravity_kokkos.cpp +action fix_gravity_kokkos.h action fix_langevin_kokkos.cpp action fix_langevin_kokkos.h +action fix_neigh_history_kokkos.cpp +action fix_neigh_history_kokkos.h action fix_nh_kokkos.cpp action fix_nh_kokkos.h action fix_nph_kokkos.cpp @@ -107,6 +115,8 @@ action fix_npt_kokkos.cpp action fix_npt_kokkos.h action fix_nve_kokkos.cpp action fix_nve_kokkos.h +action fix_nve_sphere_kokkos.cpp +action fix_nve_sphere_kokkos.h action fix_nvt_kokkos.cpp action fix_nvt_kokkos.h action fix_property_atom_kokkos.cpp @@ -193,6 +203,8 @@ action pair_eam_fs_kokkos.cpp pair_eam_fs.cpp action pair_eam_fs_kokkos.h pair_eam_fs.h action pair_exp6_rx_kokkos.cpp pair_exp6_rx.cpp action pair_exp6_rx_kokkos.h pair_exp6_rx.h +action pair_gran_hooke_history_kokkos.h pair_gran_hooke_history.h +action pair_gran_hooke_history_kokkos.cpp pair_gran_hooke_history.cpp action pair_hybrid_kokkos.cpp action pair_hybrid_kokkos.h action pair_hybrid_overlay_kokkos.cpp diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.cpp b/src/KOKKOS/atom_vec_dpd_kokkos.cpp index 24b23c6ccf667f076c5fe6e51157a7fc4a555dc4..30db76e72346540fe7b897010dde370808541412 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dpd_kokkos.cpp @@ -48,6 +48,8 @@ AtomVecDPDKokkos::AtomVecDPDKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) k_count = DAT::tdual_int_1d("atom::k_count",1); atomKK = (AtomKokkos *) atom; commKK = (CommKokkos *) comm; + + no_comm_vel_flag = 1; } /* ---------------------------------------------------------------------- diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp index 7e1cc200d36bec7d4c9571fb403ca8f4dbd0c242..ce36f590531167a429bd5e68a044eb576617213c 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp @@ -26,7 +26,10 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -AtomVecHybridKokkos::AtomVecHybridKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) {} +AtomVecHybridKokkos::AtomVecHybridKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) { + no_comm_vel_flag = 1; + no_border_vel_flag = 1; +} /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp index f2c04bec1b6281b3922513628b2a655e44c9baf7..83af437ebaaecb97a9af0ed8dedaee6842734638 100644 --- a/src/KOKKOS/atom_vec_kokkos.cpp +++ b/src/KOKKOS/atom_vec_kokkos.cpp @@ -26,6 +26,9 @@ AtomVecKokkos::AtomVecKokkos(LAMMPS *lmp) : AtomVec(lmp) kokkosable = 1; buffer = NULL; buffer_size = 0; + + no_comm_vel_flag = 0; + no_border_vel_flag = 1; } /* ---------------------------------------------------------------------- */ @@ -305,6 +308,277 @@ void AtomVecKokkos::unpack_comm_kokkos(const int &n, const int &first, } } + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType,int PBC_FLAG,int TRICLINIC,int DEFORM_VREMAP> +struct AtomVecKokkos_PackCommVel { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_x_array_randomread _x; + typename ArrayTypes<DeviceType>::t_int_1d _mask; + typename ArrayTypes<DeviceType>::t_v_array _v; + typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf; + typename ArrayTypes<DeviceType>::t_int_2d_const _list; + const int _iswap; + X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; + X_FLOAT _pbc[6]; + X_FLOAT _h_rate[6]; + const int _deform_vremap; + + AtomVecKokkos_PackCommVel( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_int_1d &mask, + const typename DAT::tdual_v_array &v, + const typename DAT::tdual_xfloat_2d &buf, + const typename DAT::tdual_int_2d &list, + const int &iswap, + const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, + const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc, + const double * const h_rate, + const int &deform_vremap): + _x(x.view<DeviceType>()), + _mask(mask.view<DeviceType>()), + _v(v.view<DeviceType>()), + _list(list.view<DeviceType>()),_iswap(iswap), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz), + _deform_vremap(deform_vremap) + { + const size_t elements = 6; + const int maxsend = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements; + _buf = typename ArrayTypes<DeviceType>::t_xfloat_2d_um(buf.view<DeviceType>().data(),maxsend,elements); + _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; + _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; + _h_rate[0] = h_rate[0]; _h_rate[1] = h_rate[1]; _h_rate[2] = h_rate[2]; + _h_rate[3] = h_rate[3]; _h_rate[4] = h_rate[4]; _h_rate[5] = h_rate[5]; + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _buf(i,0) = _x(j,0); + _buf(i,1) = _x(j,1); + _buf(i,2) = _x(j,2); + _buf(i,3) = _v(j,0); + _buf(i,4) = _v(j,1); + _buf(i,5) = _v(j,2); + } else { + if (TRICLINIC == 0) { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + } else { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + } + + if (DEFORM_VREMAP == 0) { + _buf(i,3) = _v(j,0); + _buf(i,4) = _v(j,1); + _buf(i,5) = _v(j,2); + } else { + if (_mask(i) & _deform_vremap) { + _buf(i,3) = _v(j,0) + _pbc[0]*_h_rate[0] + _pbc[5]*_h_rate[5] + _pbc[4]*_h_rate[4]; + _buf(i,4) = _v(j,1) + _pbc[1]*_h_rate[1] + _pbc[3]*_h_rate[3]; + _buf(i,5) = _v(j,2) + _pbc[2]*_h_rate[2]; + } else { + _buf(i,3) = _v(j,0); + _buf(i,4) = _v(j,1); + _buf(i,5) = _v(j,2); + } + } + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecKokkos::pack_comm_vel_kokkos( + const int &n, + const DAT::tdual_int_2d &list, + const int & iswap, + const DAT::tdual_xfloat_2d &buf, + const int &pbc_flag, + const int* const pbc) +{ + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK|V_MASK); + if (pbc_flag) { + if (deform_vremap) { + if (domain->triclinic) { + struct AtomVecKokkos_PackCommVel<LMPHostType,1,1,1> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommVel<LMPHostType,1,0,1> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecKokkos_PackCommVel<LMPHostType,1,1,0> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommVel<LMPHostType,1,0,0> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } + } + } else { + if(domain->triclinic) { + struct AtomVecKokkos_PackCommVel<LMPHostType,0,1,0> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommVel<LMPHostType,0,0,0> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } + } + } else { + sync(Device,X_MASK|V_MASK); + if(pbc_flag) { + if(deform_vremap) { + if(domain->triclinic) { + struct AtomVecKokkos_PackCommVel<LMPDeviceType,1,1,1> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommVel<LMPDeviceType,1,0,1> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecKokkos_PackCommVel<LMPDeviceType,1,1,0> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommVel<LMPDeviceType,1,0,0> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } + } + } else { + if(domain->triclinic) { + struct AtomVecKokkos_PackCommVel<LMPDeviceType,0,1,0> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommVel<LMPDeviceType,0,0,0> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_v, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } + } + } + return n*6; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +struct AtomVecKokkos_UnpackCommVel { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_x_array _x; + typename ArrayTypes<DeviceType>::t_v_array _v; + typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf; + int _first; + + AtomVecKokkos_UnpackCommVel( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_v_array &v, + const typename DAT::tdual_xfloat_2d &buf, + const int& first): + _x(x.view<DeviceType>()), + _v(v.view<DeviceType>()), + _first(first) + { + const size_t elements = 6; + const int maxsend = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements; + buffer_view<DeviceType>(_buf,buf,maxsend,elements); + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + _x(i+_first,0) = _buf(i,0); + _x(i+_first,1) = _buf(i,1); + _x(i+_first,2) = _buf(i,2); + _v(i+_first,0) = _buf(i,3); + _v(i+_first,1) = _buf(i,4); + _v(i+_first,2) = _buf(i,5); + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecKokkos::unpack_comm_vel_kokkos(const int &n, const int &first, + const DAT::tdual_xfloat_2d &buf ) { + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK|V_MASK); + modified(Host,X_MASK|V_MASK); + struct AtomVecKokkos_UnpackCommVel<LMPHostType> f(atomKK->k_x,atomKK->k_v,buf,first); + Kokkos::parallel_for(n,f); + } else { + sync(Device,X_MASK|V_MASK); + modified(Device,X_MASK|V_MASK); + struct AtomVecKokkos_UnpackCommVel<LMPDeviceType> f(atomKK->k_x,atomKK->k_v,buf,first); + Kokkos::parallel_for(n,f); + } +} + /* ---------------------------------------------------------------------- */ int AtomVecKokkos::pack_comm(int n, int *list, double *buf, diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h index b68fdc9b209dcefa6e8ee36c21053a396bc73396..efe55c47ad00215fc94f5fe8b3c231bd85d5de38 100644 --- a/src/KOKKOS/atom_vec_kokkos.h +++ b/src/KOKKOS/atom_vec_kokkos.h @@ -60,6 +60,15 @@ class AtomVecKokkos : public AtomVec { unpack_comm_kokkos(const int &n, const int &nfirst, const DAT::tdual_xfloat_2d &buf); + virtual int + pack_comm_vel_kokkos(const int &n, const DAT::tdual_int_2d &list, + const int & iswap, const DAT::tdual_xfloat_2d &buf, + const int &pbc_flag, const int pbc[]); + + virtual void + unpack_comm_vel_kokkos(const int &n, const int &nfirst, + const DAT::tdual_xfloat_2d &buf); + virtual int unpack_reverse_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap, const int nfirst); @@ -82,6 +91,16 @@ class AtomVecKokkos : public AtomVec { const DAT::tdual_xfloat_2d &buf, ExecutionSpace space) = 0; + virtual int + pack_border_vel_kokkos(int n, DAT::tdual_int_2d k_sendlist, + DAT::tdual_xfloat_2d buf,int iswap, + int pbc_flag, int *pbc, ExecutionSpace space) { return 0; } + + virtual void + unpack_border_vel_kokkos(const int &n, const int &nfirst, + const DAT::tdual_xfloat_2d &buf, + ExecutionSpace space) {} + virtual int pack_exchange_kokkos(const int &nsend, DAT::tdual_xfloat_2d &buf, DAT::tdual_int_1d k_sendlist, @@ -94,6 +113,8 @@ class AtomVecKokkos : public AtomVec { ExecutionSpace space) = 0; + int no_comm_vel_flag,no_border_vel_flag; + protected: HAT::t_x_array h_x; diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.cpp b/src/KOKKOS/atom_vec_sphere_kokkos.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3dfbc5efdca0f6fe2c9ba87600396971439684d6 --- /dev/null +++ b/src/KOKKOS/atom_vec_sphere_kokkos.cpp @@ -0,0 +1,2903 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include <cmath> +#include <cstdlib> +#include <cstring> +#include "atom_vec_sphere_kokkos.h" +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "comm_kokkos.h" +#include "domain.h" +#include "modify.h" +#include "force.h" +#include "fix.h" +#include "fix_adapt.h" +#include "math_const.h" +#include "memory.h" +#include "error.h" +#include "memory_kokkos.h" + +using namespace LAMMPS_NS; + +#define DELTA 10000 + +static const double MY_PI = 3.14159265358979323846; // pi + +/* ---------------------------------------------------------------------- */ + +AtomVecSphereKokkos::AtomVecSphereKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) +{ + molecular = 0; + + comm_x_only = 1; + comm_f_only = 0; + size_forward = 3; + size_reverse = 6; + size_border = 8; + size_velocity = 6; + size_data_atom = 7; + size_data_vel = 7; + xcol_data = 5; + + atom->sphere_flag = 1; + atom->radius_flag = atom->rmass_flag = atom->omega_flag = + atom->torque_flag = 1; + + k_count = DAT::tdual_int_1d("atom::k_count",1); + atomKK = (AtomKokkos *) atom; + commKK = (CommKokkos *) comm; + + no_border_vel_flag = 0; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::init() +{ + AtomVec::init(); + + // set radvary if particle diameters are time-varying due to fix adapt + + radvary = 0; + comm_x_only = 1; + size_forward = 3; + + for (int i = 0; i < modify->nfix; i++) { + if (strcmp(modify->fix[i]->style,"adapt") == 0) { + FixAdapt *fix = (FixAdapt *) modify->fix[i]; + if (fix->diamflag) { + radvary = 1; + comm_x_only = 0; + size_forward = 5; + } + } + } +} + +/* ---------------------------------------------------------------------- + grow atom arrays + n = 0 grows arrays by a chunk + n > 0 allocates arrays to size n +------------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::grow(int n) +{ + if (n == 0) nmax += DELTA; + else nmax = n; + atom->nmax = nmax; + if (nmax < 0 || nmax > MAXSMALLINT) + error->one(FLERR,"Per-processor system is too big"); + + sync(Device,ALL_MASK); + modified(Device,ALL_MASK); + + memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); + memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); + memoryKK->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask"); + memoryKK->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image"); + + memoryKK->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x"); + memoryKK->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v"); + memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f"); + memoryKK->grow_kokkos(atomKK->k_radius,atomKK->radius,nmax,"atom:radius"); + memoryKK->grow_kokkos(atomKK->k_rmass,atomKK->rmass,nmax,"atom:rmass"); + memoryKK->grow_kokkos(atomKK->k_omega,atomKK->omega,nmax,3,"atom:omega"); + memoryKK->grow_kokkos(atomKK->k_torque,atomKK->torque,nmax,3,"atom:torque"); + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax); + + grow_reset(); + sync(Host,ALL_MASK); +} + +/* ---------------------------------------------------------------------- + reset local array ptrs +------------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::grow_reset() +{ + tag = atomKK->tag; + d_tag = atomKK->k_tag.d_view; + h_tag = atomKK->k_tag.h_view; + + type = atomKK->type; + d_type = atomKK->k_type.d_view; + h_type = atomKK->k_type.h_view; + mask = atomKK->mask; + d_mask = atomKK->k_mask.d_view; + h_mask = atomKK->k_mask.h_view; + image = atomKK->image; + d_image = atomKK->k_image.d_view; + h_image = atomKK->k_image.h_view; + + x = atomKK->x; + d_x = atomKK->k_x.d_view; + h_x = atomKK->k_x.h_view; + v = atomKK->v; + d_v = atomKK->k_v.d_view; + h_v = atomKK->k_v.h_view; + f = atomKK->f; + d_f = atomKK->k_f.d_view; + h_f = atomKK->k_f.h_view; + radius = atomKK->radius; + d_radius = atomKK->k_radius.d_view; + h_radius = atomKK->k_radius.h_view; + rmass = atomKK->rmass; + d_rmass = atomKK->k_rmass.d_view; + h_rmass = atomKK->k_rmass.h_view; + omega = atomKK->omega; + d_omega = atomKK->k_omega.d_view; + h_omega = atomKK->k_omega.h_view; + torque = atomKK->torque; + d_torque = atomKK->k_torque.d_view; + h_torque = atomKK->k_torque.h_view; +} + +/* ---------------------------------------------------------------------- + copy atom I info to atom J +------------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::copy(int i, int j, int delflag) +{ + sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK | RADIUS_MASK | + RMASS_MASK | OMEGA_MASK); + + h_tag[j] = h_tag[i]; + h_type[j] = h_type[i]; + h_mask[j] = h_mask[i]; + h_image[j] = h_image[i]; + h_x(j,0) = h_x(i,0); + h_x(j,1) = h_x(i,1); + h_x(j,2) = h_x(i,2); + h_v(j,0) = h_v(i,0); + h_v(j,1) = h_v(i,1); + h_v(j,2) = h_v(i,2); + + h_radius[j] = h_radius[i]; + h_rmass[j] = h_rmass[i]; + h_omega(j,0) = h_omega(i,0); + h_omega(j,1) = h_omega(i,1); + h_omega(j,2) = h_omega(i,2); + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag); + + modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK | RADIUS_MASK | + RMASS_MASK | OMEGA_MASK); +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType,int PBC_FLAG,int TRICLINIC> +struct AtomVecSphereKokkos_PackComm { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_x_array_randomread _x; + typename ArrayTypes<DeviceType>::t_float_1d _radius,_rmass; + typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf; + typename ArrayTypes<DeviceType>::t_int_2d_const _list; + const int _iswap; + X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; + X_FLOAT _pbc[6]; + + AtomVecSphereKokkos_PackComm( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_float_1d &radius, + const typename DAT::tdual_float_1d &rmass, + const typename DAT::tdual_xfloat_2d &buf, + const typename DAT::tdual_int_2d &list, + const int & iswap, + const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, + const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): + _x(x.view<DeviceType>()), + _radius(radius.view<DeviceType>()), + _rmass(rmass.view<DeviceType>()), + _list(list.view<DeviceType>()),_iswap(iswap), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz) { + const size_t elements = 5; + const size_t maxsend = (buf.view<DeviceType>().extent(0)*buf.view<DeviceType>().extent(1))/elements; + _buf = typename ArrayTypes<DeviceType>::t_xfloat_2d_um(buf.view<DeviceType>().data(),maxsend,elements); + _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; + _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _buf(i,0) = _x(j,0); + _buf(i,1) = _x(j,1); + _buf(i,2) = _x(j,2); + } else { + if (TRICLINIC == 0) { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + } else { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + } + } + _buf(i,3) = _radius(j); + _buf(i,4) = _rmass(j); + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::pack_comm_kokkos( + const int &n, + const DAT::tdual_int_2d &list, + const int & iswap, + const DAT::tdual_xfloat_2d &buf, + const int &pbc_flag, + const int* const pbc) +{ + // Fallback to AtomVecKokkos if radvary == 0 + if (radvary == 0) + return AtomVecKokkos::pack_comm_kokkos(n,list,iswap,buf,pbc_flag,pbc); + // Check whether to always run forward communication on the host + // Choose correct forward PackComm kernel + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecSphereKokkos_PackComm<LMPHostType,1,1> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecSphereKokkos_PackComm<LMPHostType,1,0> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecSphereKokkos_PackComm<LMPHostType,0,1> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecSphereKokkos_PackComm<LMPHostType,0,0> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + } else { + sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecSphereKokkos_PackComm<LMPDeviceType,1,1> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecSphereKokkos_PackComm<LMPDeviceType,1,0> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecSphereKokkos_PackComm<LMPDeviceType,0,1> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecSphereKokkos_PackComm<LMPDeviceType,0,0> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + } + return n*size_forward; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType,int RADVARY,int PBC_FLAG,int TRICLINIC,int DEFORM_VREMAP> +struct AtomVecSphereKokkos_PackCommVel { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_x_array_randomread _x; + typename ArrayTypes<DeviceType>::t_int_1d _mask; + typename ArrayTypes<DeviceType>::t_float_1d _radius,_rmass; + typename ArrayTypes<DeviceType>::t_v_array _v, _omega; + typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf; + typename ArrayTypes<DeviceType>::t_int_2d_const _list; + const int _iswap; + X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; + X_FLOAT _pbc[6]; + X_FLOAT _h_rate[6]; + const int _deform_vremap; + + AtomVecSphereKokkos_PackCommVel( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_int_1d &mask, + const typename DAT::tdual_float_1d &radius, + const typename DAT::tdual_float_1d &rmass, + const typename DAT::tdual_v_array &v, + const typename DAT::tdual_v_array &omega, + const typename DAT::tdual_xfloat_2d &buf, + const typename DAT::tdual_int_2d &list, + const int &iswap, + const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, + const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc, + const double * const h_rate, + const int &deform_vremap): + _x(x.view<DeviceType>()), + _mask(mask.view<DeviceType>()), + _radius(radius.view<DeviceType>()), + _rmass(rmass.view<DeviceType>()), + _v(v.view<DeviceType>()), + _omega(omega.view<DeviceType>()), + _list(list.view<DeviceType>()),_iswap(iswap), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz), + _deform_vremap(deform_vremap) + { + const size_t elements = 9 + 2 * RADVARY; + const int maxsend = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements; + _buf = typename ArrayTypes<DeviceType>::t_xfloat_2d_um(buf.view<DeviceType>().data(),maxsend,elements); + _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; + _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; + _h_rate[0] = h_rate[0]; _h_rate[1] = h_rate[1]; _h_rate[2] = h_rate[2]; + _h_rate[3] = h_rate[3]; _h_rate[4] = h_rate[4]; _h_rate[5] = h_rate[5]; + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _buf(i,0) = _x(j,0); + _buf(i,1) = _x(j,1); + _buf(i,2) = _x(j,2); + } else { + if (TRICLINIC == 0) { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + } else { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + } + } + if (DEFORM_VREMAP == 0) { + _buf(i,3) = _v(j,0); + _buf(i,4) = _v(j,1); + _buf(i,5) = _v(j,2); + } else { + if (_mask(i) & _deform_vremap) { + _buf(i,3) = _v(j,0) + _pbc[0]*_h_rate[0] + _pbc[5]*_h_rate[5] + _pbc[4]*_h_rate[4]; + _buf(i,4) = _v(j,1) + _pbc[1]*_h_rate[1] + _pbc[3]*_h_rate[3]; + _buf(i,5) = _v(j,2) + _pbc[2]*_h_rate[2]; + } else { + _buf(i,3) = _v(j,0); + _buf(i,4) = _v(j,1); + _buf(i,5) = _v(j,2); + } + } + _buf(i,6) = _omega(j,0); + _buf(i,7) = _omega(j,1); + _buf(i,8) = _omega(j,2); + if (RADVARY) { + _buf(i,9) = _radius(j); + _buf(i,10) = _rmass(j); + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::pack_comm_vel_kokkos( + const int &n, + const DAT::tdual_int_2d &list, + const int & iswap, + const DAT::tdual_xfloat_2d &buf, + const int &pbc_flag, + const int* const pbc) +{ + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); + if(pbc_flag) { + if(deform_vremap) { + if(domain->triclinic) { + if (radvary == 0) { + struct AtomVecSphereKokkos_PackCommVel<LMPHostType,0,1,1,1> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_radius,atomKK->k_rmass, + atomKK->k_v,atomKK->k_omega, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecSphereKokkos_PackCommVel<LMPHostType,1,1,1,1> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_radius,atomKK->k_rmass, + atomKK->k_v,atomKK->k_omega, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } + } else { + if (radvary == 0) { + struct AtomVecSphereKokkos_PackCommVel<LMPHostType,0,1,0,1> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_radius,atomKK->k_rmass, + atomKK->k_v,atomKK->k_omega, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecSphereKokkos_PackCommVel<LMPHostType,1,1,0,1> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_radius,atomKK->k_rmass, + atomKK->k_v,atomKK->k_omega, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } + } + } else { + if(domain->triclinic) { + if (radvary == 0) { + struct AtomVecSphereKokkos_PackCommVel<LMPHostType,0,1,1,0> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_radius,atomKK->k_rmass, + atomKK->k_v,atomKK->k_omega, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecSphereKokkos_PackCommVel<LMPHostType,1,1,1,0> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_radius,atomKK->k_rmass, + atomKK->k_v,atomKK->k_omega, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } + } else { + if (radvary == 0) { + struct AtomVecSphereKokkos_PackCommVel<LMPHostType,0,1,0,0> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_radius,atomKK->k_rmass, + atomKK->k_v,atomKK->k_omega, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecSphereKokkos_PackCommVel<LMPHostType,1,1,0,0> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_radius,atomKK->k_rmass, + atomKK->k_v,atomKK->k_omega, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } + } + } + } else { + if(domain->triclinic) { + if (radvary == 0) { + struct AtomVecSphereKokkos_PackCommVel<LMPHostType,0,0,1,0> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_radius,atomKK->k_rmass, + atomKK->k_v,atomKK->k_omega, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecSphereKokkos_PackCommVel<LMPHostType,1,0,1,0> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_radius,atomKK->k_rmass, + atomKK->k_v,atomKK->k_omega, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } + } else { + if (radvary == 0) { + struct AtomVecSphereKokkos_PackCommVel<LMPHostType,0,0,0,0> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_radius,atomKK->k_rmass, + atomKK->k_v,atomKK->k_omega, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecSphereKokkos_PackCommVel<LMPHostType,1,0,0,0> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_radius,atomKK->k_rmass, + atomKK->k_v,atomKK->k_omega, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } + } + } + } else { + sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); + if(pbc_flag) { + if(deform_vremap) { + if(domain->triclinic) { + if (radvary == 0) { + struct AtomVecSphereKokkos_PackCommVel<LMPDeviceType,0,1,1,1> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_radius,atomKK->k_rmass, + atomKK->k_v,atomKK->k_omega, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecSphereKokkos_PackCommVel<LMPDeviceType,1,1,1,1> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_radius,atomKK->k_rmass, + atomKK->k_v,atomKK->k_omega, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } + } else { + if (radvary == 0) { + struct AtomVecSphereKokkos_PackCommVel<LMPDeviceType,0,1,0,1> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_radius,atomKK->k_rmass, + atomKK->k_v,atomKK->k_omega, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecSphereKokkos_PackCommVel<LMPDeviceType,1,1,0,1> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_radius,atomKK->k_rmass, + atomKK->k_v,atomKK->k_omega, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } + } + } else { + if(domain->triclinic) { + if (radvary == 0) { + struct AtomVecSphereKokkos_PackCommVel<LMPDeviceType,0,1,1,0> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_radius,atomKK->k_rmass, + atomKK->k_v,atomKK->k_omega, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecSphereKokkos_PackCommVel<LMPDeviceType,1,1,1,0> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_radius,atomKK->k_rmass, + atomKK->k_v,atomKK->k_omega, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } + } else { + if (radvary == 0) { + struct AtomVecSphereKokkos_PackCommVel<LMPDeviceType,0,1,0,0> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_radius,atomKK->k_rmass, + atomKK->k_v,atomKK->k_omega, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecSphereKokkos_PackCommVel<LMPDeviceType,1,1,0,0> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_radius,atomKK->k_rmass, + atomKK->k_v,atomKK->k_omega, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } + } + } + } else { + if(domain->triclinic) { + if (radvary == 0) { + struct AtomVecSphereKokkos_PackCommVel<LMPDeviceType,0,0,1,0> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_radius,atomKK->k_rmass, + atomKK->k_v,atomKK->k_omega, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecSphereKokkos_PackCommVel<LMPDeviceType,1,0,1,0> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_radius,atomKK->k_rmass, + atomKK->k_v,atomKK->k_omega, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } + } else { + if (radvary == 0) { + struct AtomVecSphereKokkos_PackCommVel<LMPDeviceType,0,0,0,0> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_radius,atomKK->k_rmass, + atomKK->k_v,atomKK->k_omega, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecSphereKokkos_PackCommVel<LMPDeviceType,1,0,0,0> f( + atomKK->k_x,atomKK->k_mask, + atomKK->k_radius,atomKK->k_rmass, + atomKK->k_v,atomKK->k_omega, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap); + Kokkos::parallel_for(n,f); + } + } + } + } + return n*(size_forward+size_velocity); +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType,int PBC_FLAG,int TRICLINIC> +struct AtomVecSphereKokkos_PackCommSelf { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_x_array_randomread _x; + typename ArrayTypes<DeviceType>::t_x_array _xw; + typename ArrayTypes<DeviceType>::t_float_1d _radius,_rmass; + int _nfirst; + typename ArrayTypes<DeviceType>::t_int_2d_const _list; + const int _iswap; + X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; + X_FLOAT _pbc[6]; + + AtomVecSphereKokkos_PackCommSelf( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_float_1d &radius, + const typename DAT::tdual_float_1d &rmass, + const int &nfirst, + const typename DAT::tdual_int_2d &list, + const int & iswap, + const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, + const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): + _x(x.view<DeviceType>()),_xw(x.view<DeviceType>()), + _radius(radius.view<DeviceType>()), + _rmass(rmass.view<DeviceType>()), + _nfirst(nfirst),_list(list.view<DeviceType>()),_iswap(iswap), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz) { + _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; + _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _xw(i+_nfirst,0) = _x(j,0); + _xw(i+_nfirst,1) = _x(j,1); + _xw(i+_nfirst,2) = _x(j,2); + } else { + if (TRICLINIC == 0) { + _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd; + _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd; + _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; + } else { + _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; + } + } + _radius(i+_nfirst) = _radius(j); + _rmass(i+_nfirst) = _rmass(j); + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::pack_comm_self( + const int &n, const DAT::tdual_int_2d &list, const int &iswap, + const int nfirst, const int &pbc_flag, const int* const pbc) { + // Fallback to AtomVecKokkos if radvary == 0 + if (radvary == 0) + return AtomVecKokkos::pack_comm_self(n,list,iswap,nfirst,pbc_flag,pbc); + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK); + modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecSphereKokkos_PackCommSelf<LMPHostType,1,1> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecSphereKokkos_PackCommSelf<LMPHostType,1,0> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecSphereKokkos_PackCommSelf<LMPHostType,0,1> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecSphereKokkos_PackCommSelf<LMPHostType,0,0> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + } else { + sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK); + modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecSphereKokkos_PackCommSelf<LMPDeviceType,1,1> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecSphereKokkos_PackCommSelf<LMPDeviceType,1,0> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecSphereKokkos_PackCommSelf<LMPDeviceType,0,1> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecSphereKokkos_PackCommSelf<LMPDeviceType,0,0> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + } + return n*size_forward; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +struct AtomVecSphereKokkos_UnpackComm { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_x_array _x; + typename ArrayTypes<DeviceType>::t_float_1d _radius,_rmass; + typename ArrayTypes<DeviceType>::t_xfloat_2d_const_um _buf; + int _first; + + AtomVecSphereKokkos_UnpackComm( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_float_1d &radius, + const typename DAT::tdual_float_1d &rmass, + const typename DAT::tdual_xfloat_2d &buf, + const int& first): + _x(x.view<DeviceType>()), + _radius(radius.view<DeviceType>()), + _rmass(rmass.view<DeviceType>()), + _first(first) + { + const size_t elements = 5; + const size_t maxsend = (buf.view<DeviceType>().extent(0)*buf.view<DeviceType>().extent(1))/elements; + _buf = typename ArrayTypes<DeviceType>::t_xfloat_2d_const_um(buf.view<DeviceType>().data(),maxsend,elements); + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + _x(i+_first,0) = _buf(i,0); + _x(i+_first,1) = _buf(i,1); + _x(i+_first,2) = _buf(i,2); + _radius(i+_first) = _buf(i,3); + _rmass(i+_first) = _buf(i,4); + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::unpack_comm_kokkos( + const int &n, const int &first, + const DAT::tdual_xfloat_2d &buf ) { + // Fallback to AtomVecKokkos if radvary == 0 + if (radvary == 0) { + AtomVecKokkos::unpack_comm_kokkos(n,first,buf); + return; + } + if(commKK->forward_comm_on_host) { + modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK); + struct AtomVecSphereKokkos_UnpackComm<LMPHostType> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + buf,first); + Kokkos::parallel_for(n,f); + } else { + modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK); + struct AtomVecSphereKokkos_UnpackComm<LMPDeviceType> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + buf,first); + Kokkos::parallel_for(n,f); + } +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType,int RADVARY> +struct AtomVecSphereKokkos_UnpackCommVel { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_x_array _x; + typename ArrayTypes<DeviceType>::t_float_1d _radius,_rmass; + typename ArrayTypes<DeviceType>::t_v_array _v, _omega; + typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf; + int _first; + + AtomVecSphereKokkos_UnpackCommVel( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_float_1d &radius, + const typename DAT::tdual_float_1d &rmass, + const typename DAT::tdual_v_array &v, + const typename DAT::tdual_v_array &omega, + const typename DAT::tdual_xfloat_2d &buf, + const int& first): + _x(x.view<DeviceType>()), + _radius(radius.view<DeviceType>()), + _rmass(rmass.view<DeviceType>()), + _v(v.view<DeviceType>()), + _omega(omega.view<DeviceType>()), + _first(first) + { + const size_t elements = 9 + 2 * RADVARY; + const int maxsend = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements; + buffer_view<DeviceType>(_buf,buf,maxsend,elements); + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + _x(i+_first,0) = _buf(i,0); + _x(i+_first,1) = _buf(i,1); + _x(i+_first,2) = _buf(i,2); + _v(i+_first,0) = _buf(i,3); + _v(i+_first,1) = _buf(i,4); + _v(i+_first,2) = _buf(i,5); + _omega(i+_first,0) = _buf(i,6); + _omega(i+_first,1) = _buf(i,7); + _omega(i+_first,2) = _buf(i,8); + if (RADVARY) { + _radius(i+_first) = _buf(i,9); + _rmass(i+_first) = _buf(i,10); + } + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::unpack_comm_vel_kokkos( + const int &n, const int &first, + const DAT::tdual_xfloat_2d &buf ) { + if(commKK->forward_comm_on_host) { + modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); + if (radvary == 0) { + struct AtomVecSphereKokkos_UnpackCommVel<LMPHostType,0> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + atomKK->k_v,atomKK->k_omega, + buf,first); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecSphereKokkos_UnpackCommVel<LMPHostType,1> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + atomKK->k_v,atomKK->k_omega, + buf,first); + Kokkos::parallel_for(n,f); + } + } else { + modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); + if (radvary == 0) { + struct AtomVecSphereKokkos_UnpackCommVel<LMPDeviceType,0> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + atomKK->k_v,atomKK->k_omega, + buf,first); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecSphereKokkos_UnpackCommVel<LMPDeviceType,1> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + atomKK->k_v,atomKK->k_omega, + buf,first); + Kokkos::parallel_for(n,f); + } + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::pack_comm(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz; + + if (radvary == 0) { + // Not sure if we need to call sync for X here + sync(Host,X_MASK); + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; + dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; + dz = pbc[2]*domain->zprd; + } + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + } + } + } else { + sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK); + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = h_radius[j]; + buf[m++] = h_rmass[j]; + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; + dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; + dz = pbc[2]*domain->zprd; + } + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = h_radius[j]; + buf[m++] = h_rmass[j]; + } + } + } + + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::pack_comm_vel(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz,dvx,dvy,dvz; + + if (radvary == 0) { + sync(Host,X_MASK|V_MASK|OMEGA_MASK); + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + buf[m++] = h_omega(j,0); + buf[m++] = h_omega(j,1); + buf[m++] = h_omega(j,2); + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; + dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; + dz = pbc[2]*domain->zprd; + } + if (!deform_vremap) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + buf[m++] = h_omega(j,0); + buf[m++] = h_omega(j,1); + buf[m++] = h_omega(j,2); + } + } else { + dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; + dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; + dvz = pbc[2]*h_rate[2]; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + if (mask[i] & deform_groupbit) { + buf[m++] = h_v(j,0) + dvx; + buf[m++] = h_v(j,1) + dvy; + buf[m++] = h_v(j,2) + dvz; + } else { + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + buf[m++] = h_omega(j,0); + buf[m++] = h_omega(j,1); + buf[m++] = h_omega(j,2); + } + } + } + } else { + sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = h_radius[j]; + buf[m++] = h_rmass[j]; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + buf[m++] = h_omega(j,0); + buf[m++] = h_omega(j,1); + buf[m++] = h_omega(j,2); + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; + dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; + dz = pbc[2]*domain->zprd; + } + if (!deform_vremap) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = h_radius[j]; + buf[m++] = h_rmass[j]; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + buf[m++] = h_omega(j,0); + buf[m++] = h_omega(j,1); + buf[m++] = h_omega(j,2); + } + } else { + dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; + dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; + dvz = pbc[2]*h_rate[2]; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = h_radius[j]; + buf[m++] = h_rmass[j]; + if (mask[i] & deform_groupbit) { + buf[m++] = h_v(j,0) + dvx; + buf[m++] = h_v(j,1) + dvy; + buf[m++] = h_v(j,2) + dvz; + } else { + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + buf[m++] = h_omega(j,0); + buf[m++] = h_omega(j,1); + buf[m++] = h_omega(j,2); + } + } + } + } + + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::pack_comm_hybrid(int n, int *list, double *buf) +{ + if (radvary == 0) return 0; + + sync(Host,RADIUS_MASK|RMASS_MASK); + + int m = 0; + for (int i = 0; i < n; i++) { + const int j = list[i]; + buf[m++] = h_radius[j]; + buf[m++] = h_rmass[j]; + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::unpack_comm(int n, int first, double *buf) +{ + if (radvary == 0) { + int m = 0; + const int last = first + n; + for (int i = first; i < last; i++) { + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + } + modified(Host,X_MASK); + } else { + int m = 0; + const int last = first + n; + for (int i = first; i < last; i++) { + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_radius[i] = buf[m++]; + h_rmass[i] = buf[m++]; + } + modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK); + } +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::unpack_comm_vel(int n, int first, double *buf) +{ + if (radvary == 0) { + int m = 0; + const int last = first + n; + for (int i = first; i < last; i++) { + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_v(i,0) = buf[m++]; + h_v(i,1) = buf[m++]; + h_v(i,2) = buf[m++]; + h_omega(i,0) = buf[m++]; + h_omega(i,1) = buf[m++]; + h_omega(i,2) = buf[m++]; + } + modified(Host,X_MASK|V_MASK|OMEGA_MASK); + } else { + int m = 0; + const int last = first + n; + for (int i = first; i < last; i++) { + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_radius[i] = buf[m++]; + h_rmass[i] = buf[m++]; + h_v(i,0) = buf[m++]; + h_v(i,1) = buf[m++]; + h_v(i,2) = buf[m++]; + h_omega(i,0) = buf[m++]; + h_omega(i,1) = buf[m++]; + h_omega(i,2) = buf[m++]; + } + modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::unpack_comm_hybrid(int n, int first, double *buf) +{ + if (radvary == 0) return 0; + + int m = 0; + const int last = first + n; + for (int i = first; i < last; i++) { + h_radius[i] = buf[m++]; + h_rmass[i] = buf[m++]; + } + modified(Host,RADIUS_MASK|RMASS_MASK); + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::pack_reverse(int n, int first, double *buf) +{ + if(n > 0) + sync(Host,F_MASK|TORQUE_MASK); + + int m = 0; + const int last = first + n; + for (int i = first; i < last; i++) { + buf[m++] = h_f(i,0); + buf[m++] = h_f(i,1); + buf[m++] = h_f(i,2); + buf[m++] = h_torque(i,0); + buf[m++] = h_torque(i,1); + buf[m++] = h_torque(i,2); + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::pack_reverse_hybrid(int n, int first, double *buf) +{ + if(n > 0) + sync(Host,TORQUE_MASK); + + int m = 0; + const int last = first + n; + for (int i = first; i < last; i++) { + buf[m++] = h_torque(i,0); + buf[m++] = h_torque(i,1); + buf[m++] = h_torque(i,2); + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::unpack_reverse(int n, int *list, double *buf) +{ + if(n > 0) { + modified(Host,F_MASK|TORQUE_MASK); + } + + int m = 0; + for (int i = 0; i < n; i++) { + const int j = list[i]; + h_f(j,0) += buf[m++]; + h_f(j,1) += buf[m++]; + h_f(j,2) += buf[m++]; + h_torque(j,0) += buf[m++]; + h_torque(j,1) += buf[m++]; + h_torque(j,2) += buf[m++]; + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::unpack_reverse_hybrid(int n, int *list, double *buf) +{ + if(n > 0) { + modified(Host,TORQUE_MASK); + } + + int m = 0; + for (int i = 0; i < n; i++) { + const int j = list[i]; + h_torque(j,0) += buf[m++]; + h_torque(j,1) += buf[m++]; + h_torque(j,2) += buf[m++]; + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType,int PBC_FLAG> +struct AtomVecSphereKokkos_PackBorder { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf; + const typename ArrayTypes<DeviceType>::t_int_2d_const _list; + const int _iswap; + const typename ArrayTypes<DeviceType>::t_x_array_randomread _x; + const typename ArrayTypes<DeviceType>::t_tagint_1d _tag; + const typename ArrayTypes<DeviceType>::t_int_1d _type; + const typename ArrayTypes<DeviceType>::t_int_1d _mask; + typename ArrayTypes<DeviceType>::t_float_1d _radius,_rmass; + X_FLOAT _dx,_dy,_dz; + + AtomVecSphereKokkos_PackBorder( + const typename ArrayTypes<DeviceType>::t_xfloat_2d &buf, + const typename ArrayTypes<DeviceType>::t_int_2d_const &list, + const int &iswap, + const typename ArrayTypes<DeviceType>::t_x_array &x, + const typename ArrayTypes<DeviceType>::t_tagint_1d &tag, + const typename ArrayTypes<DeviceType>::t_int_1d &type, + const typename ArrayTypes<DeviceType>::t_int_1d &mask, + const typename ArrayTypes<DeviceType>::t_float_1d &radius, + const typename ArrayTypes<DeviceType>::t_float_1d &rmass, + const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz): + _list(list),_iswap(iswap), + _x(x),_tag(tag),_type(type),_mask(mask), + _radius(radius), + _rmass(rmass), + _dx(dx),_dy(dy),_dz(dz) + { + const size_t elements = 8; + const int maxsend = (buf.extent(0)*buf.extent(1))/elements; + _buf = typename ArrayTypes<DeviceType>::t_xfloat_2d_um(buf.data(),maxsend,elements); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _buf(i,0) = _x(j,0); + _buf(i,1) = _x(j,1); + _buf(i,2) = _x(j,2); + } else { + _buf(i,0) = _x(j,0) + _dx; + _buf(i,1) = _x(j,1) + _dy; + _buf(i,2) = _x(j,2) + _dz; + } + _buf(i,3) = d_ubuf(_tag(j)).d; + _buf(i,4) = d_ubuf(_type(j)).d; + _buf(i,5) = d_ubuf(_mask(j)).d; + _buf(i,6) = _radius(j); + _buf(i,7) = _rmass(j); + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::pack_border_kokkos( + int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap, + int pbc_flag, int *pbc, ExecutionSpace space) +{ + X_FLOAT dx,dy,dz; + + // This was in atom_vec_dpd_kokkos but doesn't appear in any other atom_vec + sync(space,ALL_MASK); + + if (pbc_flag != 0) { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + if(space==Host) { + AtomVecSphereKokkos_PackBorder<LMPHostType,1> f( + buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(), + iswap,h_x,h_tag,h_type,h_mask, + h_radius,h_rmass, + dx,dy,dz); + Kokkos::parallel_for(n,f); + } else { + AtomVecSphereKokkos_PackBorder<LMPDeviceType,1> f( + buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(), + iswap,d_x,d_tag,d_type,d_mask, + d_radius,d_rmass, + dx,dy,dz); + Kokkos::parallel_for(n,f); + } + } else { + dx = dy = dz = 0; + if(space==Host) { + AtomVecSphereKokkos_PackBorder<LMPHostType,0> f( + buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(), + iswap,h_x,h_tag,h_type,h_mask, + h_radius,h_rmass, + dx,dy,dz); + Kokkos::parallel_for(n,f); + } else { + AtomVecSphereKokkos_PackBorder<LMPDeviceType,0> f( + buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(), + iswap,d_x,d_tag,d_type,d_mask, + d_radius,d_rmass, + dx,dy,dz); + Kokkos::parallel_for(n,f); + } + } + return n*size_border; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::pack_border( + int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz; + + sync(Host,ALL_MASK); + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = ubuf(h_tag[j]).d; + buf[m++] = ubuf(h_type[j]).d; + buf[m++] = ubuf(h_mask[j]).d; + buf[m++] = h_radius[j]; + buf[m++] = h_rmass[j]; + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = ubuf(h_tag[j]).d; + buf[m++] = ubuf(h_type[j]).d; + buf[m++] = ubuf(h_mask[j]).d; + buf[m++] = h_radius[j]; + buf[m++] = h_rmass[j]; + } + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]); + + return m; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType,int PBC_FLAG,int DEFORM_VREMAP> +struct AtomVecSphereKokkos_PackBorderVel { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf; + const typename ArrayTypes<DeviceType>::t_int_2d_const _list; + const int _iswap; + const typename ArrayTypes<DeviceType>::t_x_array_randomread _x; + const typename ArrayTypes<DeviceType>::t_tagint_1d _tag; + const typename ArrayTypes<DeviceType>::t_int_1d _type; + const typename ArrayTypes<DeviceType>::t_int_1d _mask; + typename ArrayTypes<DeviceType>::t_float_1d _radius,_rmass; + typename ArrayTypes<DeviceType>::t_v_array _v, _omega; + X_FLOAT _dx,_dy,_dz, _dvx, _dvy, _dvz; + const int _deform_groupbit; + + AtomVecSphereKokkos_PackBorderVel( + const typename ArrayTypes<DeviceType>::t_xfloat_2d &buf, + const typename ArrayTypes<DeviceType>::t_int_2d_const &list, + const int &iswap, + const typename ArrayTypes<DeviceType>::t_x_array &x, + const typename ArrayTypes<DeviceType>::t_tagint_1d &tag, + const typename ArrayTypes<DeviceType>::t_int_1d &type, + const typename ArrayTypes<DeviceType>::t_int_1d &mask, + const typename ArrayTypes<DeviceType>::t_float_1d &radius, + const typename ArrayTypes<DeviceType>::t_float_1d &rmass, + const typename ArrayTypes<DeviceType>::t_v_array &v, + const typename ArrayTypes<DeviceType>::t_v_array &omega, + const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz, + const X_FLOAT &dvx, const X_FLOAT &dvy, const X_FLOAT &dvz, + const int &deform_groupbit): + _buf(buf),_list(list),_iswap(iswap), + _x(x),_tag(tag),_type(type),_mask(mask), + _radius(radius), + _rmass(rmass), + _v(v), _omega(omega), + _dx(dx),_dy(dy),_dz(dz), + _dvx(dvx),_dvy(dvy),_dvz(dvz), + _deform_groupbit(deform_groupbit) + { + const size_t elements = 14; + const int maxsend = (buf.extent(0)*buf.extent(1))/elements; + _buf = typename ArrayTypes<DeviceType>::t_xfloat_2d_um(buf.data(),maxsend,elements); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _buf(i,0) = _x(j,0); + _buf(i,1) = _x(j,1); + _buf(i,2) = _x(j,2); + } else { + _buf(i,0) = _x(j,0) + _dx; + _buf(i,1) = _x(j,1) + _dy; + _buf(i,2) = _x(j,2) + _dz; + } + _buf(i,3) = d_ubuf(_tag(j)).d; + _buf(i,4) = d_ubuf(_type(j)).d; + _buf(i,5) = d_ubuf(_mask(j)).d; + _buf(i,6) = _radius(j); + _buf(i,7) = _rmass(j); + if (DEFORM_VREMAP) { + if (_mask(i) & _deform_groupbit) { + _buf(i,8) = _v(j,0) + _dvx; + _buf(i,9) = _v(j,1) + _dvy; + _buf(i,10) = _v(j,2) + _dvz; + } + } + else { + _buf(i,8) = _v(j,0); + _buf(i,9) = _v(j,1); + _buf(i,10) = _v(j,2); + } + _buf(i,11) = _omega(j,0); + _buf(i,12) = _omega(j,1); + _buf(i,13) = _omega(j,2); + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::pack_border_vel_kokkos( + int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap, + int pbc_flag, int *pbc, ExecutionSpace space) +{ + X_FLOAT dx=0,dy=0,dz=0; + X_FLOAT dvx=0,dvy=0,dvz=0; + + // This was in atom_vec_dpd_kokkos but doesn't appear in any other atom_vec + sync(space,ALL_MASK); + + if (pbc_flag != 0) { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + if (!deform_vremap) { + if(space==Host) { + AtomVecSphereKokkos_PackBorderVel<LMPHostType,1,0> f( + buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(), + iswap,h_x,h_tag,h_type,h_mask, + h_radius,h_rmass, + h_v, h_omega, + dx,dy,dz,dvx,dvy,dvz, + deform_groupbit); + Kokkos::parallel_for(n,f); + } else { + AtomVecSphereKokkos_PackBorderVel<LMPDeviceType,1,0> f( + buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(), + iswap,d_x,d_tag,d_type,d_mask, + d_radius,d_rmass, + d_v, d_omega, + dx,dy,dz,dvx,dvy,dvz, + deform_groupbit); + Kokkos::parallel_for(n,f); + } + } + else { + dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; + dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; + dvz = pbc[2]*h_rate[2]; + if(space==Host) { + AtomVecSphereKokkos_PackBorderVel<LMPHostType,1,1> f( + buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(), + iswap,h_x,h_tag,h_type,h_mask, + h_radius,h_rmass, + h_v, h_omega, + dx,dy,dz,dvx,dvy,dvz, + deform_groupbit); + Kokkos::parallel_for(n,f); + } else { + AtomVecSphereKokkos_PackBorderVel<LMPDeviceType,1,1> f( + buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(), + iswap,d_x,d_tag,d_type,d_mask, + d_radius,d_rmass, + d_v, d_omega, + dx,dy,dz,dvx,dvy,dvz, + deform_groupbit); + Kokkos::parallel_for(n,f); + } + } + } else { + if(space==Host) { + AtomVecSphereKokkos_PackBorderVel<LMPHostType,0,0> f( + buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(), + iswap,h_x,h_tag,h_type,h_mask, + h_radius,h_rmass, + h_v, h_omega, + dx,dy,dz,dvx,dvy,dvz, + deform_groupbit); + Kokkos::parallel_for(n,f); + } else { + AtomVecSphereKokkos_PackBorderVel<LMPDeviceType,0,0> f( + buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(), + iswap,d_x,d_tag,d_type,d_mask, + d_radius,d_rmass, + d_v, d_omega, + dx,dy,dz,dvx,dvy,dvz, + deform_groupbit); + Kokkos::parallel_for(n,f); + } + } + + return n*(size_border + size_velocity); +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::pack_border_vel(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz,dvx,dvy,dvz; + + sync(Host,ALL_MASK); + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = ubuf(h_tag[j]).d; + buf[m++] = ubuf(h_type[j]).d; + buf[m++] = ubuf(h_mask[j]).d; + buf[m++] = h_radius[j]; + buf[m++] = h_rmass[j]; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + buf[m++] = h_omega(j,0); + buf[m++] = h_omega(j,1); + buf[m++] = h_omega(j,2); + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + if (!deform_vremap) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = ubuf(h_tag[j]).d; + buf[m++] = ubuf(h_type[j]).d; + buf[m++] = ubuf(h_mask[j]).d; + buf[m++] = h_radius[j]; + buf[m++] = h_rmass[j]; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + buf[m++] = h_omega(j,0); + buf[m++] = h_omega(j,1); + buf[m++] = h_omega(j,2); + } + } else { + dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; + dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; + dvz = pbc[2]*h_rate[2]; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = ubuf(h_tag[j]).d; + buf[m++] = ubuf(h_type[j]).d; + buf[m++] = ubuf(h_mask[j]).d; + buf[m++] = h_radius[j]; + buf[m++] = h_rmass[j]; + if (mask[i] & deform_groupbit) { + buf[m++] = h_v(j,0) + dvx; + buf[m++] = h_v(j,1) + dvy; + buf[m++] = h_v(j,2) + dvz; + } else { + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + buf[m++] = h_omega(j,0); + buf[m++] = h_omega(j,1); + buf[m++] = h_omega(j,2); + } + } + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]); + + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::pack_border_hybrid(int n, int *list, double *buf) +{ + sync(Host,RADIUS_MASK|RMASS_MASK); + + int m = 0; + for (int i = 0; i < n; i++) { + const int j = list[i]; + buf[m++] = h_radius[j]; + buf[m++] = h_rmass[j]; + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +struct AtomVecSphereKokkos_UnpackBorder { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_xfloat_2d_const_um _buf; + typename ArrayTypes<DeviceType>::t_x_array _x; + typename ArrayTypes<DeviceType>::t_tagint_1d _tag; + typename ArrayTypes<DeviceType>::t_int_1d _type; + typename ArrayTypes<DeviceType>::t_int_1d _mask; + typename ArrayTypes<DeviceType>::t_float_1d _radius,_rmass; + int _first; + + AtomVecSphereKokkos_UnpackBorder( + const typename ArrayTypes<DeviceType>::t_xfloat_2d &buf, + const typename ArrayTypes<DeviceType>::t_x_array &x, + const typename ArrayTypes<DeviceType>::t_tagint_1d &tag, + const typename ArrayTypes<DeviceType>::t_int_1d &type, + const typename ArrayTypes<DeviceType>::t_int_1d &mask, + const typename ArrayTypes<DeviceType>::t_float_1d &radius, + const typename ArrayTypes<DeviceType>::t_float_1d &rmass, + const int& first): + _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask), + _radius(radius), + _rmass(rmass), + _first(first) + { + const size_t elements = 8; + const int maxsend = (buf.extent(0)*buf.extent(1))/elements; + _buf = typename ArrayTypes<DeviceType>::t_xfloat_2d_const_um(buf.data(),maxsend,elements); + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + _x(i+_first,0) = _buf(i,0); + _x(i+_first,1) = _buf(i,1); + _x(i+_first,2) = _buf(i,2); + _tag(i+_first) = static_cast<tagint> (d_ubuf(_buf(i,3)).i); + _type(i+_first) = static_cast<int> (d_ubuf(_buf(i,4)).i); + _mask(i+_first) = static_cast<int> (d_ubuf(_buf(i,5)).i); + _radius(i+_first) = _buf(i,6); + _rmass(i+_first) = _buf(i,7); + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::unpack_border_kokkos(const int &n, const int &first, + const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) { + while (first+n >= nmax) grow(0); + if(space==Host) { + struct AtomVecSphereKokkos_UnpackBorder<LMPHostType> f(buf.view<LMPHostType>(), + h_x,h_tag,h_type,h_mask, + h_radius,h_rmass, + first); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecSphereKokkos_UnpackBorder<LMPDeviceType> f(buf.view<LMPDeviceType>(), + d_x,d_tag,d_type,d_mask, + d_radius,d_rmass, + first); + Kokkos::parallel_for(n,f); + } + + modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| + RADIUS_MASK|RMASS_MASK); +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::unpack_border(int n, int first, double *buf) +{ + int m = 0; + const int last = first + n; + for (int i = first; i < last; i++) { + if (i == nmax) grow(0); + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_tag[i] = (tagint) ubuf(buf[m++]).i; + h_type[i] = (int) ubuf(buf[m++]).i; + h_mask[i] = (int) ubuf(buf[m++]).i; + h_radius[i] = buf[m++]; + h_rmass[i] = buf[m++]; + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]-> + unpack_border(n,first,&buf[m]); + + modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|RADIUS_MASK|RMASS_MASK); +} + + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +struct AtomVecSphereKokkos_UnpackBorderVel { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_xfloat_2d_const_um _buf; + typename ArrayTypes<DeviceType>::t_x_array _x; + typename ArrayTypes<DeviceType>::t_tagint_1d _tag; + typename ArrayTypes<DeviceType>::t_int_1d _type; + typename ArrayTypes<DeviceType>::t_int_1d _mask; + typename ArrayTypes<DeviceType>::t_float_1d _radius,_rmass; + typename ArrayTypes<DeviceType>::t_v_array _v; + typename ArrayTypes<DeviceType>::t_v_array _omega; + int _first; + + AtomVecSphereKokkos_UnpackBorderVel( + const typename ArrayTypes<DeviceType>::t_xfloat_2d_const &buf, + const typename ArrayTypes<DeviceType>::t_x_array &x, + const typename ArrayTypes<DeviceType>::t_tagint_1d &tag, + const typename ArrayTypes<DeviceType>::t_int_1d &type, + const typename ArrayTypes<DeviceType>::t_int_1d &mask, + const typename ArrayTypes<DeviceType>::t_float_1d &radius, + const typename ArrayTypes<DeviceType>::t_float_1d &rmass, + const typename ArrayTypes<DeviceType>::t_v_array &v, + const typename ArrayTypes<DeviceType>::t_v_array &omega, + const int& first): + _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask), + _radius(radius), + _rmass(rmass), + _v(v), _omega(omega), + _first(first) + { + const size_t elements = 14; + const int maxsend = (buf.extent(0)*buf.extent(1))/elements; + _buf = typename ArrayTypes<DeviceType>::t_xfloat_2d_const_um(buf.data(),maxsend,elements); + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + _x(i+_first,0) = _buf(i,0); + _x(i+_first,1) = _buf(i,1); + _x(i+_first,2) = _buf(i,2); + _tag(i+_first) = static_cast<tagint> (d_ubuf(_buf(i,3)).i); + _type(i+_first) = static_cast<int> (d_ubuf(_buf(i,4)).i); + _mask(i+_first) = static_cast<int> (d_ubuf(_buf(i,5)).i); + _radius(i+_first) = _buf(i,6); + _rmass(i+_first) = _buf(i,7); + _v(i+_first,0) = _buf(i,8); + _v(i+_first,1) = _buf(i,9); + _v(i+_first,2) = _buf(i,10); + _omega(i+_first,0) = _buf(i,11); + _omega(i+_first,1) = _buf(i,12); + _omega(i+_first,2) = _buf(i,13); + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::unpack_border_vel_kokkos( + const int &n, const int &first, + const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) { + while (first+n >= nmax) grow(0); + if(space==Host) { + struct AtomVecSphereKokkos_UnpackBorderVel<LMPHostType> f(buf.view<LMPHostType>(), + h_x,h_tag,h_type,h_mask, + h_radius,h_rmass, + h_v, h_omega, + first); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecSphereKokkos_UnpackBorderVel<LMPDeviceType> f(buf.view<LMPDeviceType>(), + d_x,d_tag,d_type,d_mask, + d_radius,d_rmass, + d_v, d_omega, + first); + Kokkos::parallel_for(n,f); + } + + modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| + RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::unpack_border_vel(int n, int first, double *buf) +{ + int m = 0; + const int last = first + n; + for (int i = first; i < last; i++) { + if (i == nmax) grow(0); + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_tag[i] = (tagint) ubuf(buf[m++]).i; + h_type[i] = (int) ubuf(buf[m++]).i; + h_mask[i] = (int) ubuf(buf[m++]).i; + h_radius[i] = buf[m++]; + h_rmass[i] = buf[m++]; + h_v(i,0) = buf[m++]; + h_v(i,1) = buf[m++]; + h_v(i,2) = buf[m++]; + h_omega(i,0) = buf[m++]; + h_omega(i,1) = buf[m++]; + h_omega(i,2) = buf[m++]; + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]-> + unpack_border(n,first,&buf[m]); + + modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::unpack_border_hybrid(int n, int first, double *buf) +{ + int m = 0; + const int last = first + n; + for (int i = first; i < last; i++) { + h_radius[i] = buf[m++]; + h_rmass[i] = buf[m++]; + } + modified(Host,RADIUS_MASK|RMASS_MASK); + return m; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +struct AtomVecSphereKokkos_PackExchangeFunctor { + typedef DeviceType device_type; + typedef ArrayTypes<DeviceType> AT; + typename AT::t_x_array_randomread _x; + typename AT::t_v_array_randomread _v; + typename AT::t_tagint_1d_randomread _tag; + typename AT::t_int_1d_randomread _type; + typename AT::t_int_1d_randomread _mask; + typename AT::t_imageint_1d_randomread _image; + typename AT::t_float_1d_randomread _radius,_rmass; + typename AT::t_v_array_randomread _omega; + typename AT::t_x_array _xw; + typename AT::t_v_array _vw; + typename AT::t_tagint_1d _tagw; + typename AT::t_int_1d _typew; + typename AT::t_int_1d _maskw; + typename AT::t_imageint_1d _imagew; + typename AT::t_float_1d _radiusw,_rmassw; + typename AT::t_v_array _omegaw; + typename AT::t_xfloat_2d_um _buf; + typename AT::t_int_1d_const _sendlist; + typename AT::t_int_1d_const _copylist; + int _nlocal,_dim; + X_FLOAT _lo,_hi; + + AtomVecSphereKokkos_PackExchangeFunctor( + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d sendlist, + typename AT::tdual_int_1d copylist,int nlocal, int dim,X_FLOAT lo, X_FLOAT hi): + _x(atom->k_x.view<DeviceType>()), + _v(atom->k_v.view<DeviceType>()), + _tag(atom->k_tag.view<DeviceType>()), + _type(atom->k_type.view<DeviceType>()), + _mask(atom->k_mask.view<DeviceType>()), + _image(atom->k_image.view<DeviceType>()), + _radius(atom->k_radius.view<DeviceType>()), + _rmass(atom->k_rmass.view<DeviceType>()), + _omega(atom->k_omega.view<DeviceType>()), + _xw(atom->k_x.view<DeviceType>()), + _vw(atom->k_v.view<DeviceType>()), + _tagw(atom->k_tag.view<DeviceType>()), + _typew(atom->k_type.view<DeviceType>()), + _maskw(atom->k_mask.view<DeviceType>()), + _imagew(atom->k_image.view<DeviceType>()), + _radiusw(atom->k_radius.view<DeviceType>()), + _rmassw(atom->k_rmass.view<DeviceType>()), + _omegaw(atom->k_omega.view<DeviceType>()), + _sendlist(sendlist.template view<DeviceType>()), + _copylist(copylist.template view<DeviceType>()), + _nlocal(nlocal),_dim(dim), + _lo(lo),_hi(hi) + { + const size_t elements = 16; + const int maxsend = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements; + + _buf = typename AT::t_xfloat_2d_um(buf.template view<DeviceType>().data(),maxsend,elements); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int &mysend) const { + const int i = _sendlist(mysend); + _buf(mysend,0) = 16; + _buf(mysend,1) = _x(i,0); + _buf(mysend,2) = _x(i,1); + _buf(mysend,3) = _x(i,2); + _buf(mysend,4) = _v(i,0); + _buf(mysend,5) = _v(i,1); + _buf(mysend,6) = _v(i,2); + _buf(mysend,7) = d_ubuf(_tag[i]).d; + _buf(mysend,8) = d_ubuf(_type[i]).d; + _buf(mysend,9) = d_ubuf(_mask[i]).d; + _buf(mysend,10) = d_ubuf(_image[i]).d; + _buf(mysend,11) = _radius[i]; + _buf(mysend,12) = _rmass[i]; + _buf(mysend,13) = _omega(i,0); + _buf(mysend,14) = _omega(i,1); + _buf(mysend,15) = _omega(i,2); + const int j = _copylist(mysend); + + if (j>-1) { + _xw(i,0) = _x(j,0); + _xw(i,1) = _x(j,1); + _xw(i,2) = _x(j,2); + _vw(i,0) = _v(j,0); + _vw(i,1) = _v(j,1); + _vw(i,2) = _v(j,2); + _tagw[i] = _tag(j); + _typew[i] = _type(j); + _maskw[i] = _mask(j); + _imagew[i] = _image(j); + _radiusw[i] = _radius(j); + _rmassw[i] = _rmass(j); + _omegaw(i,0) = _omega(j,0); + _omegaw(i,1) = _omega(j,1); + _omegaw(i,2) = _omega(j,2); + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::pack_exchange_kokkos( + const int &nsend, + DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space,int dim,X_FLOAT lo,X_FLOAT hi) +{ + if(nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/16) { + int newsize = nsend*17/k_buf.view<LMPHostType>().extent(1)+1; + k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1)); + } + sync(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK| RADIUS_MASK | RMASS_MASK | + OMEGA_MASK); + + if(space == Host) { + AtomVecSphereKokkos_PackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + Kokkos::parallel_for(nsend,f); + } else { + AtomVecSphereKokkos_PackExchangeFunctor<LMPDeviceType> f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + Kokkos::parallel_for(nsend,f); + } + return nsend*16; +} + +/* ---------------------------------------------------------------------- + pack data for atom I for sending to another proc + xyz must be 1st 3 values, so comm::exchange() can test on them +------------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::pack_exchange(int i, double *buf) +{ + sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK| RADIUS_MASK | RMASS_MASK | + OMEGA_MASK); + + + int m = 1; + buf[m++] = h_x(i,0); + buf[m++] = h_x(i,1); + buf[m++] = h_x(i,2); + buf[m++] = h_v(i,0); + buf[m++] = h_v(i,1); + buf[m++] = h_v(i,2); + buf[m++] = ubuf(h_tag[i]).d; + buf[m++] = ubuf(h_type[i]).d; + buf[m++] = ubuf(h_mask[i]).d; + buf[m++] = ubuf(h_image[i]).d; + + buf[m++] = h_radius[i]; + buf[m++] = h_rmass[i]; + buf[m++] = h_omega(i,0); + buf[m++] = h_omega(i,1); + buf[m++] = h_omega(i,2); + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + m += modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&buf[m]); + + buf[0] = m; + return m; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +struct AtomVecSphereKokkos_UnpackExchangeFunctor { + typedef DeviceType device_type; + typedef ArrayTypes<DeviceType> AT; + typename AT::t_x_array _x; + typename AT::t_v_array _v; + typename AT::t_tagint_1d _tag; + typename AT::t_int_1d _type; + typename AT::t_int_1d _mask; + typename AT::t_imageint_1d _image; + typename AT::t_float_1d _radius; + typename AT::t_float_1d _rmass; + typename AT::t_v_array _omega; + typename AT::t_xfloat_2d_um _buf; + typename AT::t_int_1d _nlocal; + int _dim; + X_FLOAT _lo,_hi; + + AtomVecSphereKokkos_UnpackExchangeFunctor( + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d nlocal, + int dim, X_FLOAT lo, X_FLOAT hi): + _x(atom->k_x.view<DeviceType>()), + _v(atom->k_v.view<DeviceType>()), + _tag(atom->k_tag.view<DeviceType>()), + _type(atom->k_type.view<DeviceType>()), + _mask(atom->k_mask.view<DeviceType>()), + _image(atom->k_image.view<DeviceType>()), + _radius(atom->k_radius.view<DeviceType>()), + _rmass(atom->k_rmass.view<DeviceType>()), + _omega(atom->k_omega.view<DeviceType>()), + _nlocal(nlocal.template view<DeviceType>()),_dim(dim), + _lo(lo),_hi(hi) + { + const size_t elements = 16; + const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements; + + buffer_view<DeviceType>(_buf,buf,maxsendlist,elements); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int &myrecv) const { + X_FLOAT x = _buf(myrecv,_dim+1); + if (x >= _lo && x < _hi) { + int i = Kokkos::atomic_fetch_add(&_nlocal(0),1); + _x(i,0) = _buf(myrecv,1); + _x(i,1) = _buf(myrecv,2); + _x(i,2) = _buf(myrecv,3); + _v(i,0) = _buf(myrecv,4); + _v(i,1) = _buf(myrecv,5); + _v(i,2) = _buf(myrecv,6); + _tag[i] = (tagint) d_ubuf(_buf(myrecv,7)).i; + _type[i] = (int) d_ubuf(_buf(myrecv,8)).i; + _mask[i] = (int) d_ubuf(_buf(myrecv,9)).i; + _image[i] = (imageint) d_ubuf(_buf(myrecv,10)).i; + _radius[i] = _buf(myrecv,11); + _rmass[i] = _buf(myrecv,12); + _omega(i,0) = _buf(myrecv,13); + _omega(i,1) = _buf(myrecv,14); + _omega(i,2) = _buf(myrecv,15); + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) { + if(space == Host) { + k_count.h_view(0) = nlocal; + AtomVecSphereKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi); + Kokkos::parallel_for(nrecv/16,f); + } else { + k_count.h_view(0) = nlocal; + k_count.modify<LMPHostType>(); + k_count.sync<LMPDeviceType>(); + AtomVecSphereKokkos_UnpackExchangeFunctor<LMPDeviceType> f(atomKK,k_buf,k_count,dim,lo,hi); + Kokkos::parallel_for(nrecv/16,f); + k_count.modify<LMPDeviceType>(); + k_count.sync<LMPHostType>(); + } + + modified(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK| RADIUS_MASK | RMASS_MASK | + OMEGA_MASK); + + return k_count.h_view(0); +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::unpack_exchange(double *buf) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) grow(0); + + int m = 1; + h_x(nlocal,0) = buf[m++]; + h_x(nlocal,1) = buf[m++]; + h_x(nlocal,2) = buf[m++]; + h_v(nlocal,0) = buf[m++]; + h_v(nlocal,1) = buf[m++]; + h_v(nlocal,2) = buf[m++]; + h_tag[nlocal] = (tagint) ubuf(buf[m++]).i; + h_type[nlocal] = (int) ubuf(buf[m++]).i; + h_mask[nlocal] = (int) ubuf(buf[m++]).i; + h_image[nlocal] = (imageint) ubuf(buf[m++]).i; + + h_radius[nlocal] = buf[m++]; + h_rmass[nlocal] = buf[m++]; + h_omega(nlocal,0) = buf[m++]; + h_omega(nlocal,1) = buf[m++]; + h_omega(nlocal,2) = buf[m++]; + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + m += modify->fix[atom->extra_grow[iextra]]-> + unpack_exchange(nlocal,&buf[m]); + + modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK | RADIUS_MASK | RMASS_MASK | + OMEGA_MASK); + + atom->nlocal++; + return m; +} + +/* ---------------------------------------------------------------------- + size of restart data for all atoms owned by this proc + include extra data stored by fixes +------------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::size_restart() +{ + int i; + + int nlocal = atom->nlocal; + int n = 16 * nlocal; + + if (atom->nextra_restart) + for (int iextra = 0; iextra < atom->nextra_restart; iextra++) + for (i = 0; i < nlocal; i++) + n += modify->fix[atom->extra_restart[iextra]]->size_restart(i); + + return n; +} + +/* ---------------------------------------------------------------------- + pack atom I's data for restart file including extra quantities + xyz must be 1st 3 values, so that read_restart can test on them + molecular types may be negative, but write as positive +------------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::pack_restart(int i, double *buf) +{ + sync(Host,X_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK | V_MASK | + RADIUS_MASK | RMASS_MASK | OMEGA_MASK); + + int m = 1; + buf[m++] = h_x(i,0); + buf[m++] = h_x(i,1); + buf[m++] = h_x(i,2); + buf[m++] = ubuf(h_tag[i]).d; + buf[m++] = ubuf(h_type[i]).d; + buf[m++] = ubuf(h_mask[i]).d; + buf[m++] = ubuf(h_image[i]).d; + buf[m++] = h_v(i,0); + buf[m++] = h_v(i,1); + buf[m++] = h_v(i,2); + + buf[m++] = h_radius[i]; + buf[m++] = h_rmass[i]; + buf[m++] = h_omega(i,0); + buf[m++] = h_omega(i,1); + buf[m++] = h_omega(i,2); + + if (atom->nextra_restart) + for (int iextra = 0; iextra < atom->nextra_restart; iextra++) + m += modify->fix[atom->extra_restart[iextra]]->pack_restart(i,&buf[m]); + + buf[0] = m; + return m; +} + +/* ---------------------------------------------------------------------- + unpack data for one atom from restart file including extra quantities +------------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::unpack_restart(double *buf) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) { + grow(0); + if (atom->nextra_store) + memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra"); + } + + int m = 1; + h_x(nlocal,0) = buf[m++]; + h_x(nlocal,1) = buf[m++]; + h_x(nlocal,2) = buf[m++]; + h_tag[nlocal] = (tagint) ubuf(buf[m++]).i; + h_type[nlocal] = (int) ubuf(buf[m++]).i; + h_mask[nlocal] = (int) ubuf(buf[m++]).i; + h_image[nlocal] = (imageint) ubuf(buf[m++]).i; + h_v(nlocal,0) = buf[m++]; + h_v(nlocal,1) = buf[m++]; + h_v(nlocal,2) = buf[m++]; + + h_radius[nlocal] = buf[m++]; + h_rmass[nlocal] = buf[m++]; + h_omega(nlocal,0) = buf[m++]; + h_omega(nlocal,1) = buf[m++]; + h_omega(nlocal,2) = buf[m++]; + + double **extra = atom->extra; + if (atom->nextra_store) { + int size = static_cast<int> (buf[0]) - m; + for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++]; + } + + modified(Host,X_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK | V_MASK | + RADIUS_MASK | RMASS_MASK | OMEGA_MASK); + + atom->nlocal++; + return m; +} + +/* ---------------------------------------------------------------------- + create one atom of itype at coord + set other values to defaults +------------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::create_atom(int itype, double *coord) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) { + grow(0); + } + + h_tag[nlocal] = 0; + h_type[nlocal] = itype; + h_x(nlocal,0) = coord[0]; + h_x(nlocal,1) = coord[1]; + h_x(nlocal,2) = coord[2]; + h_mask[nlocal] = 1; + h_image[nlocal] = ((imageint) IMGMAX << IMG2BITS) | + ((imageint) IMGMAX << IMGBITS) | IMGMAX; + h_v(nlocal,0) = 0.0; + h_v(nlocal,1) = 0.0; + h_v(nlocal,2) = 0.0; + + h_radius[nlocal] = 0.5; + h_rmass[nlocal] = 4.0*MY_PI/3.0 * h_radius[nlocal]*h_radius[nlocal]*h_radius[nlocal]; + h_omega(nlocal,0) = 0.0; + h_omega(nlocal,1) = 0.0; + h_omega(nlocal,2) = 0.0; + + atomKK->modified(Host,ALL_MASK); + + atom->nlocal++; +} + +/* ---------------------------------------------------------------------- + unpack one line from Atoms section of data file + initialize other atom quantities +------------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::data_atom(double *coord, imageint imagetmp, char **values) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) grow(0); + + tag[nlocal] = ATOTAGINT(values[0]); + type[nlocal] = atoi(values[1]); + if (type[nlocal] <= 0 || type[nlocal] > atom->ntypes) + error->one(FLERR,"Invalid atom type in Atoms section of data file"); + + radius[nlocal] = 0.5 * atof(values[2]); + if (radius[nlocal] < 0.0) + error->one(FLERR,"Invalid radius in Atoms section of data file"); + + double density = atof(values[3]); + if (density <= 0.0) + error->one(FLERR,"Invalid density in Atoms section of data file"); + + if (radius[nlocal] == 0.0) rmass[nlocal] = density; + else + rmass[nlocal] = 4.0*MY_PI/3.0 * + radius[nlocal]*radius[nlocal]*radius[nlocal] * density; + + x[nlocal][0] = coord[0]; + x[nlocal][1] = coord[1]; + x[nlocal][2] = coord[2]; + + image[nlocal] = imagetmp; + + mask[nlocal] = 1; + v[nlocal][0] = 0.0; + v[nlocal][1] = 0.0; + v[nlocal][2] = 0.0; + omega[nlocal][0] = 0.0; + omega[nlocal][1] = 0.0; + omega[nlocal][2] = 0.0; + + atomKK->modified(Host,ALL_MASK); + + atom->nlocal++; +} + +/* ---------------------------------------------------------------------- + unpack hybrid quantities from one line in Atoms section of data file + initialize other atom quantities for this sub-style +------------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::data_atom_hybrid(int nlocal, char **values) +{ + radius[nlocal] = 0.5 * atof(values[0]); + if (radius[nlocal] < 0.0) + error->one(FLERR,"Invalid radius in Atoms section of data file"); + + double density = atof(values[1]); + if (density <= 0.0) + error->one(FLERR,"Invalid density in Atoms section of data file"); + + if (radius[nlocal] == 0.0) rmass[nlocal] = density; + else + rmass[nlocal] = 4.0*MY_PI/3.0 * + radius[nlocal]*radius[nlocal]*radius[nlocal] * density; + + + atomKK->modified(Host,RADIUS_MASK|RMASS_MASK); + + return 2; +} + +/* ---------------------------------------------------------------------- + unpack one line from Velocities section of data file +------------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::data_vel(int m, char **values) +{ + sync(Host,V_MASK|OMEGA_MASK); + h_v(m,0) = atof(values[0]); + h_v(m,1) = atof(values[1]); + h_v(m,2) = atof(values[2]); + h_omega(m,0) = atof(values[3]); + h_omega(m,1) = atof(values[4]); + h_omega(m,2) = atof(values[5]); + modified(Host,V_MASK|OMEGA_MASK); +} + +/* ---------------------------------------------------------------------- + unpack hybrid quantities from one line in Velocities section of data file +------------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::data_vel_hybrid(int m, char **values) +{ + sync(Host,OMEGA_MASK); + omega[m][0] = atof(values[0]); + omega[m][1] = atof(values[1]); + omega[m][2] = atof(values[2]); + modified(Host,OMEGA_MASK); + return 3; +} + +/* ---------------------------------------------------------------------- + pack atom info for data file including 3 image flags +------------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::pack_data(double **buf) +{ + atomKK->sync(Host,TAG_MASK|TYPE_MASK|RADIUS_MASK|RMASS_MASK|X_MASK|IMAGE_MASK); + + int nlocal = atom->nlocal; + for (int i = 0; i < nlocal; i++) { + buf[i][0] = ubuf(h_tag[i]).d; + buf[i][1] = ubuf(h_type[i]).d; + buf[i][2] = 2.0*h_radius[i]; + if (h_radius[i] == 0.0) buf[i][3] = h_rmass[i]; + else + buf[i][3] = h_rmass[i] / (4.0*MY_PI/3.0 * h_radius[i]*h_radius[i]*h_radius[i]); + buf[i][4] = h_x(i,0); + buf[i][5] = h_x(i,1); + buf[i][6] = h_x(i,2); + buf[i][7] = ubuf((h_image[i] & IMGMASK) - IMGMAX).d; + buf[i][8] = ubuf((h_image[i] >> IMGBITS & IMGMASK) - IMGMAX).d; + buf[i][9] = ubuf((h_image[i] >> IMG2BITS) - IMGMAX).d; + } +} + +/* ---------------------------------------------------------------------- + pack hybrid atom info for data file +------------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::pack_data_hybrid(int i, double *buf) +{ + atomKK->sync(Host,RADIUS_MASK|RMASS_MASK); + + buf[0] = 2.0*h_radius[i]; + if (h_radius[i] == 0.0) buf[1] = h_rmass[i]; + else buf[1] = h_rmass[i] / (4.0*MY_PI/3.0 * h_radius[i]*h_radius[i]*h_radius[i]); + return 2; +} + +/* ---------------------------------------------------------------------- + write atom info to data file including 3 image flags +------------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::write_data(FILE *fp, int n, double **buf) +{ + for (int i = 0; i < n; i++) + fprintf(fp,TAGINT_FORMAT + " %d %-1.16e %-1.16e %-1.16e %-1.16e %-1.16e %d %d %d\n", + (tagint) ubuf(buf[i][0]).i,(int) ubuf(buf[i][1]).i, + buf[i][2],buf[i][3], + buf[i][4],buf[i][5],buf[i][6], + (int) ubuf(buf[i][7]).i,(int) ubuf(buf[i][8]).i, + (int) ubuf(buf[i][9]).i); +} + +/* ---------------------------------------------------------------------- + write hybrid atom info to data file +------------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::write_data_hybrid(FILE *fp, double *buf) +{ + fprintf(fp," %-1.16e %-1.16e",buf[0],buf[1]); + return 2; +} + +/* ---------------------------------------------------------------------- + pack velocity info for data file +------------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::pack_vel(double **buf) +{ + sync(Host,TAG_MASK|V_MASK|OMEGA_MASK); + + int nlocal = atom->nlocal; + for (int i = 0; i < nlocal; i++) { + buf[i][0] = ubuf(h_tag[i]).d; + buf[i][1] = h_v(i,0); + buf[i][2] = h_v(i,1); + buf[i][3] = h_v(i,2); + buf[i][4] = h_omega(i,0); + buf[i][5] = h_omega(i,1); + buf[i][6] = h_omega(i,2); + } +} + +/* ---------------------------------------------------------------------- + pack hybrid velocity info for data file +------------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::pack_vel_hybrid(int i, double *buf) +{ + sync(Host,OMEGA_MASK); + + buf[0] = h_omega(i,0); + buf[1] = h_omega(i,1); + buf[2] = h_omega(i,2); + return 3; +} + +/* ---------------------------------------------------------------------- + write velocity info to data file +------------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::write_vel(FILE *fp, int n, double **buf) +{ + for (int i = 0; i < n; i++) + fprintf(fp,TAGINT_FORMAT + " %-1.16e %-1.16e %-1.16e %-1.16e %-1.16e %-1.16e\n", + (tagint) ubuf(buf[i][0]).i,buf[i][1],buf[i][2],buf[i][3], + buf[i][4],buf[i][5],buf[i][6]); +} + +/* ---------------------------------------------------------------------- + write hybrid velocity info to data file +------------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::write_vel_hybrid(FILE *fp, double *buf) +{ + fprintf(fp," %-1.16e %-1.16e %-1.16e",buf[0],buf[1],buf[2]); + return 3; +} + +/* ---------------------------------------------------------------------- + return # of bytes of allocated memory +------------------------------------------------------------------------- */ + +bigint AtomVecSphereKokkos::memory_usage() +{ + bigint bytes = 0; + + if (atom->memcheck("tag")) bytes += memory->usage(tag,nmax); + if (atom->memcheck("type")) bytes += memory->usage(type,nmax); + if (atom->memcheck("mask")) bytes += memory->usage(mask,nmax); + if (atom->memcheck("image")) bytes += memory->usage(image,nmax); + if (atom->memcheck("x")) bytes += memory->usage(x,nmax,3); + if (atom->memcheck("v")) bytes += memory->usage(v,nmax,3); + if (atom->memcheck("f")) bytes += memory->usage(f,nmax*comm->nthreads,3); + + if (atom->memcheck("radius")) bytes += memory->usage(radius,nmax); + if (atom->memcheck("rmass")) bytes += memory->usage(rmass,nmax); + if (atom->memcheck("omega")) bytes += memory->usage(omega,nmax,3); + if (atom->memcheck("torque")) + bytes += memory->usage(torque,nmax*comm->nthreads,3); + + return bytes; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::sync(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if (mask & X_MASK) atomKK->k_x.sync<LMPDeviceType>(); + if (mask & V_MASK) atomKK->k_v.sync<LMPDeviceType>(); + if (mask & F_MASK) atomKK->k_f.sync<LMPDeviceType>(); + if (mask & TAG_MASK) atomKK->k_tag.sync<LMPDeviceType>(); + if (mask & TYPE_MASK) atomKK->k_type.sync<LMPDeviceType>(); + if (mask & MASK_MASK) atomKK->k_mask.sync<LMPDeviceType>(); + if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPDeviceType>(); + if (mask & RADIUS_MASK) atomKK->k_radius.sync<LMPDeviceType>(); + if (mask & RMASS_MASK) atomKK->k_rmass.sync<LMPDeviceType>(); + if (mask & OMEGA_MASK) atomKK->k_omega.sync<LMPDeviceType>(); + if (mask & TORQUE_MASK) atomKK->k_torque.sync<LMPDeviceType>(); + } else { + if (mask & X_MASK) atomKK->k_x.sync<LMPHostType>(); + if (mask & V_MASK) atomKK->k_v.sync<LMPHostType>(); + if (mask & F_MASK) atomKK->k_f.sync<LMPHostType>(); + if (mask & TAG_MASK) atomKK->k_tag.sync<LMPHostType>(); + if (mask & TYPE_MASK) atomKK->k_type.sync<LMPHostType>(); + if (mask & MASK_MASK) atomKK->k_mask.sync<LMPHostType>(); + if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPHostType>(); + if (mask & RADIUS_MASK) atomKK->k_radius.sync<LMPHostType>(); + if (mask & RMASS_MASK) atomKK->k_rmass.sync<LMPHostType>(); + if (mask & OMEGA_MASK) atomKK->k_omega.sync<LMPHostType>(); + if (mask & TORQUE_MASK) atomKK->k_torque.sync<LMPHostType>(); + } +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::sync_overlapping_device(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space); + if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space); + if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space); + if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space); + if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space); + if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space); + if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space); + if ((mask & RADIUS_MASK) && atomKK->k_radius.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_float_1d>(atomKK->k_radius,space); + if ((mask & RMASS_MASK) && atomKK->k_rmass.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_float_1d>(atomKK->k_rmass,space); + if ((mask & OMEGA_MASK) && atomKK->k_omega.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_v_array>(atomKK->k_omega,space); + if ((mask & TORQUE_MASK) && atomKK->k_torque.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_f_array>(atomKK->k_torque,space); + } else { + if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space); + if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space); + if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space); + if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space); + if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space); + if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space); + if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space); + if ((mask & RADIUS_MASK) && atomKK->k_radius.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_float_1d>(atomKK->k_radius,space); + if ((mask & RMASS_MASK) && atomKK->k_rmass.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_float_1d>(atomKK->k_rmass,space); + if ((mask & OMEGA_MASK) && atomKK->k_omega.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_v_array>(atomKK->k_omega,space); + if ((mask & TORQUE_MASK) && atomKK->k_torque.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_f_array>(atomKK->k_torque,space); + } +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::modified(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if (mask & X_MASK) atomKK->k_x.modify<LMPDeviceType>(); + if (mask & V_MASK) atomKK->k_v.modify<LMPDeviceType>(); + if (mask & F_MASK) atomKK->k_f.modify<LMPDeviceType>(); + if (mask & TAG_MASK) atomKK->k_tag.modify<LMPDeviceType>(); + if (mask & TYPE_MASK) atomKK->k_type.modify<LMPDeviceType>(); + if (mask & MASK_MASK) atomKK->k_mask.modify<LMPDeviceType>(); + if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPDeviceType>(); + if (mask & RADIUS_MASK) atomKK->k_radius.modify<LMPDeviceType>(); + if (mask & RMASS_MASK) atomKK->k_rmass.modify<LMPDeviceType>(); + if (mask & OMEGA_MASK) atomKK->k_omega.modify<LMPDeviceType>(); + if (mask & TORQUE_MASK) atomKK->k_torque.modify<LMPDeviceType>(); + } else { + if (mask & X_MASK) atomKK->k_x.modify<LMPHostType>(); + if (mask & V_MASK) atomKK->k_v.modify<LMPHostType>(); + if (mask & F_MASK) atomKK->k_f.modify<LMPHostType>(); + if (mask & TAG_MASK) atomKK->k_tag.modify<LMPHostType>(); + if (mask & TYPE_MASK) atomKK->k_type.modify<LMPHostType>(); + if (mask & MASK_MASK) atomKK->k_mask.modify<LMPHostType>(); + if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPHostType>(); + if (mask & RADIUS_MASK) atomKK->k_radius.modify<LMPHostType>(); + if (mask & RMASS_MASK) atomKK->k_rmass.modify<LMPHostType>(); + if (mask & OMEGA_MASK) atomKK->k_omega.modify<LMPHostType>(); + if (mask & TORQUE_MASK) atomKK->k_torque.modify<LMPHostType>(); + } +} diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.h b/src/KOKKOS/atom_vec_sphere_kokkos.h new file mode 100644 index 0000000000000000000000000000000000000000..28c8a3c8f6ef039b959855ee59306066bb15ea36 --- /dev/null +++ b/src/KOKKOS/atom_vec_sphere_kokkos.h @@ -0,0 +1,169 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef ATOM_CLASS + +AtomStyle(sphere/kk,AtomVecSphereKokkos) +AtomStyle(sphere/kk/device,AtomVecSphereKokkos) +AtomStyle(sphere/kk/host,AtomVecSphereKokkos) + +#else + +#ifndef LMP_ATOM_VEC_SPHERE_KOKKOS_H +#define LMP_ATOM_VEC_SPHERE_KOKKOS_H + +#include "atom_vec_kokkos.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +class AtomVecSphereKokkos : public AtomVecKokkos { + public: + AtomVecSphereKokkos(class LAMMPS *); + ~AtomVecSphereKokkos() {} + void init(); + void grow(int); + void grow_reset(); + void copy(int, int, int); + int pack_comm(int, int *, double *, int, int *); + int pack_comm_vel(int, int *, double *, int, int *); + int pack_comm_hybrid(int, int *, double *); + void unpack_comm(int, int, double *); + void unpack_comm_vel(int, int, double *); + int unpack_comm_hybrid(int, int, double *); + int pack_reverse(int, int, double *); + int pack_reverse_hybrid(int, int, double *); + void unpack_reverse(int, int *, double *); + int unpack_reverse_hybrid(int, int *, double *); + int pack_border(int, int *, double *, int, int *); + int pack_border_vel(int, int *, double *, int, int *); + int pack_border_hybrid(int, int *, double *); + void unpack_border(int, int, double *); + void unpack_border_vel(int, int, double *); + int unpack_border_hybrid(int, int, double *); + int pack_exchange(int, double *); + int unpack_exchange(double *); + int size_restart(); + int pack_restart(int, double *); + int unpack_restart(double *); + void create_atom(int, double *); + void data_atom(double *, imageint, char **); + int data_atom_hybrid(int, char **); + void data_vel(int, char **); + int data_vel_hybrid(int, char **); + void pack_data(double **); + int pack_data_hybrid(int, double *); + void write_data(FILE *, int, double **); + int write_data_hybrid(FILE *, double *); + void pack_vel(double **); + int pack_vel_hybrid(int, double *); + void write_vel(FILE *, int, double **); + int write_vel_hybrid(FILE *, double *); + bigint memory_usage(); + + int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist, + const int & iswap, + const DAT::tdual_xfloat_2d &buf, + const int &pbc_flag, const int pbc[]); + void unpack_comm_kokkos(const int &n, const int &nfirst, + const DAT::tdual_xfloat_2d &buf); + int pack_comm_vel_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist, + const int & iswap, + const DAT::tdual_xfloat_2d &buf, + const int &pbc_flag, const int pbc[]); + void unpack_comm_vel_kokkos(const int &n, const int &nfirst, + const DAT::tdual_xfloat_2d &buf); + int pack_comm_self(const int &n, const DAT::tdual_int_2d &list, + const int & iswap, const int nfirst, + const int &pbc_flag, const int pbc[]); + int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, + DAT::tdual_xfloat_2d buf,int iswap, + int pbc_flag, int *pbc, ExecutionSpace space); + void unpack_border_kokkos(const int &n, const int &nfirst, + const DAT::tdual_xfloat_2d &buf, + ExecutionSpace space); + int pack_border_vel_kokkos(int n, DAT::tdual_int_2d k_sendlist, + DAT::tdual_xfloat_2d buf,int iswap, + int pbc_flag, int *pbc, ExecutionSpace space); + void unpack_border_vel_kokkos(const int &n, const int &nfirst, + const DAT::tdual_xfloat_2d &buf, + ExecutionSpace space); + int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space, int dim, + X_FLOAT lo, X_FLOAT hi); + int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, + int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, + ExecutionSpace space); + + void sync(ExecutionSpace space, unsigned int mask); + void modified(ExecutionSpace space, unsigned int mask); + void sync_overlapping_device(ExecutionSpace space, unsigned int mask); + + private: + tagint *tag; + int *type,*mask; + imageint *image; + double **x,**v,**f; + double *radius,*rmass; + double **omega,**torque; + int radvary; + + DAT::t_tagint_1d d_tag; + HAT::t_tagint_1d h_tag; + DAT::t_imageint_1d d_image; + HAT::t_imageint_1d h_image; + DAT::t_int_1d d_type, d_mask; + HAT::t_int_1d h_type, h_mask; + + DAT::t_x_array d_x; + DAT::t_v_array d_v; + DAT::t_f_array d_f; + DAT::t_float_1d d_radius; + HAT::t_float_1d h_radius; + DAT::t_float_1d d_rmass; + HAT::t_float_1d h_rmass; + DAT::t_v_array d_omega; + HAT::t_v_array h_omega; + DAT::t_f_array d_torque; + HAT::t_f_array h_torque; + + DAT::tdual_int_1d k_count; +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Per-processor system is too big + +The number of owned atoms plus ghost atoms on a single +processor must fit in 32-bit integer. + +E: Invalid atom type in Atoms section of data file + +Atom types must range from 1 to specified # of types. + +E: Invalid radius in Atoms section of data file + +Radius must be >= 0.0. + +E: Invalid density in Atoms section of data file + +Density value cannot be <= 0.0. + +*/ diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 21840b7c3ee8fd396b90e9983cd57c2d49ef64aa..f40156aabcec16b85fe396b70e54c08164a3921f 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -134,11 +134,11 @@ void CommKokkos::init() if (force->newton == 0) check_reverse = 0; if (force->pair) check_reverse += force->pair->comm_reverse_off; - if (ghost_velocity) - forward_comm_classic = true; - if (!comm_f_only) // not all Kokkos atom_vec styles have reverse pack/unpack routines yet reverse_comm_classic = true; + + if (ghost_velocity && ((AtomVecKokkos*)atom->avec)->no_comm_vel_flag) // not all Kokkos atom_vec styles have comm vel pack/unpack routines yet + forward_comm_classic = true; } /* ---------------------------------------------------------------------- @@ -191,10 +191,10 @@ void CommKokkos::forward_comm_device(int dummy) if (sendproc[iswap] != me) { if (comm_x_only) { if (size_forward_recv[iswap]) { - buf = atomKK->k_x.view<DeviceType>().data() + - firstrecv[iswap]*atomKK->k_x.view<DeviceType>().extent(1); - MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE, - recvproc[iswap],0,world,&request); + buf = atomKK->k_x.view<DeviceType>().data() + + firstrecv[iswap]*atomKK->k_x.view<DeviceType>().extent(1); + MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE, + recvproc[iswap],0,world,&request); } n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist, iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]); @@ -210,17 +210,21 @@ void CommKokkos::forward_comm_device(int dummy) space,X_MASK); } } else if (ghost_velocity) { - error->all(FLERR,"Ghost velocity forward comm not yet " - "implemented with Kokkos"); - if (size_forward_recv[iswap]) - MPI_Irecv(k_buf_recv.view<LMPHostType>().data(), + if (size_forward_recv[iswap]) { + MPI_Irecv(k_buf_recv.view<DeviceType>().data(), size_forward_recv[iswap],MPI_DOUBLE, recvproc[iswap],0,world,&request); - n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap], - buf_send,pbc_flag[iswap],pbc[iswap]); - if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world); + } + n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap, + k_buf_send,pbc_flag[iswap],pbc[iswap]); + DeviceType::fence(); + if (n) { + MPI_Send(k_buf_send.view<DeviceType>().data(),n, + MPI_DOUBLE,sendproc[iswap],0,world); + } if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); - avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_recv); + avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv); + DeviceType::fence(); } else { if (size_forward_recv[iswap]) MPI_Irecv(k_buf_recv.view<DeviceType>().data(), @@ -236,18 +240,18 @@ void CommKokkos::forward_comm_device(int dummy) avec->unpack_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv); DeviceType::fence(); } - } else { if (!ghost_velocity) { if (sendnum[iswap]) n = avec->pack_comm_self(sendnum[iswap],k_sendlist,iswap, firstrecv[iswap],pbc_flag[iswap],pbc[iswap]); - } else if (ghost_velocity) { - error->all(FLERR,"Ghost velocity forward comm not yet " - "implemented with Kokkos"); - n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap], - buf_send,pbc_flag[iswap],pbc[iswap]); - avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_send); + DeviceType::fence(); + } else { + n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap, + k_buf_send,pbc_flag[iswap],pbc[iswap]); + DeviceType::fence(); + avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send); + DeviceType::fence(); } } } @@ -631,10 +635,9 @@ void CommKokkos::exchange_device() nsend = avec->pack_exchange_kokkos(k_count.h_view(),k_buf_send, k_exchange_sendlist,k_exchange_copylist, - ExecutionSpaceFromDevice<DeviceType>:: - space,dim,lo,hi); + ExecutionSpaceFromDevice<DeviceType>::space, + dim,lo,hi); DeviceType::fence(); - } else { while (i < nlocal) { if (x[i][dim] < lo || x[i][dim] >= hi) { @@ -725,7 +728,8 @@ void CommKokkos::borders() if (!exchange_comm_classic) { static int print = 1; - if (style != Comm::SINGLE || bordergroup || ghost_velocity) { + if (mode != Comm::SINGLE || bordergroup || + (ghost_velocity && ((AtomVecKokkos*)atom->avec)->no_border_vel_flag)) { if (print && comm->me==0) { error->warning(FLERR,"Required border comm not yet implemented in Kokkos communication, " "switching to classic communication"); @@ -833,7 +837,7 @@ void CommKokkos::borders_device() { // store sent atom indices in list for use in future timesteps x = atom->x; - if (style == Comm::SINGLE) { + if (mode == Comm::SINGLE) { lo = slablo[iswap]; hi = slabhi[iswap]; } else { @@ -862,7 +866,7 @@ void CommKokkos::borders_device() { if (sendflag) { if (!bordergroup || ineed >= 2) { - if (style == Comm::SINGLE) { + if (mode == Comm::SINGLE) { k_total_send.h_view() = 0; k_total_send.template modify<LMPHostType>(); k_total_send.template sync<LMPDeviceType>(); @@ -910,7 +914,7 @@ void CommKokkos::borders_device() { } else { error->all(FLERR,"Required border comm not yet " "implemented with Kokkos"); - if (style == Comm::SINGLE) { + if (mode == Comm::SINGLE) { ngroup = atom->nfirst; for (i = 0; i < ngroup; i++) if (x[i][dim] >= lo && x[i][dim] <= hi) { @@ -947,10 +951,10 @@ void CommKokkos::borders_device() { if (nsend*size_border > maxsend) grow_send_kokkos(nsend*size_border,0); if (ghost_velocity) { - error->all(FLERR,"Required border comm not yet " - "implemented with Kokkos"); - n = avec->pack_border_vel(nsend,sendlist[iswap],buf_send, - pbc_flag[iswap],pbc[iswap]); + n = avec-> + pack_border_vel_kokkos(nsend,k_sendlist,k_buf_send,iswap, + pbc_flag[iswap],pbc[iswap],exec_space); + DeviceType::fence(); } else { n = avec-> @@ -983,11 +987,16 @@ void CommKokkos::borders_device() { // unpack buffer if (ghost_velocity) { - error->all(FLERR,"Required border comm not yet " - "implemented with Kokkos"); - avec->unpack_border_vel(nrecv,atom->nlocal+atom->nghost,buf); - } - else + if (sendproc[iswap] != me) { + avec->unpack_border_vel_kokkos(nrecv,atom->nlocal+atom->nghost, + k_buf_recv,exec_space); + DeviceType::fence(); + } else { + avec->unpack_border_vel_kokkos(nrecv,atom->nlocal+atom->nghost, + k_buf_send,exec_space); + DeviceType::fence(); + } + } else { if (sendproc[iswap] != me) { avec->unpack_border_kokkos(nrecv,atom->nlocal+atom->nghost, k_buf_recv,exec_space); @@ -997,7 +1006,7 @@ void CommKokkos::borders_device() { k_buf_send,exec_space); DeviceType::fence(); } - + } // set all pointers & counters smax = MAX(smax,nsend); @@ -1064,12 +1073,22 @@ void CommKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space) else k_buf_send.modify<LMPHostType>(); - k_buf_send.resize(maxsend_border,atom->avec->size_border); + if (ghost_velocity) + k_buf_send.resize(maxsend_border, + atom->avec->size_border + atom->avec->size_velocity); + else + k_buf_send.resize(maxsend_border,atom->avec->size_border); buf_send = k_buf_send.view<LMPHostType>().data(); } else { - k_buf_send = DAT:: - tdual_xfloat_2d("comm:k_buf_send",maxsend_border,atom->avec->size_border); + if (ghost_velocity) + k_buf_send = DAT:: + tdual_xfloat_2d("comm:k_buf_send", + maxsend_border, + atom->avec->size_border + atom->avec->size_velocity); + else + k_buf_send = DAT:: + tdual_xfloat_2d("comm:k_buf_send",maxsend_border,atom->avec->size_border); buf_send = k_buf_send.view<LMPHostType>().data(); } } @@ -1115,7 +1134,7 @@ void CommKokkos::grow_swap(int n) { free_swap(); allocate_swap(n); - if (style == Comm::MULTI) { + if (mode == Comm::MULTI) { free_multi(); allocate_multi(n); } diff --git a/src/KOKKOS/compute_temp_kokkos.cpp b/src/KOKKOS/compute_temp_kokkos.cpp index fec3d89fffa8169e975373c3f20f258af3ee0b47..d94c6e76b51df3d2187d52db23be408c6dc07635 100644 --- a/src/KOKKOS/compute_temp_kokkos.cpp +++ b/src/KOKKOS/compute_temp_kokkos.cpp @@ -49,8 +49,10 @@ double ComputeTempKokkos<DeviceType>::compute_scalar() invoked_scalar = update->ntimestep; v = atomKK->k_v.view<DeviceType>(); - rmass = atomKK->rmass; - mass = atomKK->k_mass.view<DeviceType>(); + if (atomKK->rmass) + rmass = atomKK->k_rmass.view<DeviceType>(); + else + mass = atomKK->k_mass.view<DeviceType>(); type = atomKK->k_type.view<DeviceType>(); mask = atomKK->k_mask.view<DeviceType>(); int nlocal = atom->nlocal; @@ -59,7 +61,7 @@ double ComputeTempKokkos<DeviceType>::compute_scalar() CTEMP t_kk; copymode = 1; - if (rmass) + if (atomKK->rmass) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagComputeTempScalar<1> >(0,nlocal),*this,t_kk); else Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagComputeTempScalar<0> >(0,nlocal),*this,t_kk); @@ -102,8 +104,10 @@ void ComputeTempKokkos<DeviceType>::compute_vector() invoked_vector = update->ntimestep; v = atomKK->k_v.view<DeviceType>(); - rmass = atomKK->rmass; - mass = atomKK->k_mass.view<DeviceType>(); + if (atomKK->rmass) + rmass = atomKK->k_rmass.view<DeviceType>(); + else + mass = atomKK->k_mass.view<DeviceType>(); type = atomKK->k_type.view<DeviceType>(); mask = atomKK->k_mask.view<DeviceType>(); int nlocal = atom->nlocal; @@ -113,7 +117,7 @@ void ComputeTempKokkos<DeviceType>::compute_vector() CTEMP t_kk; copymode = 1; - if (rmass) + if (atomKK->rmass) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagComputeTempVector<1> >(0,nlocal),*this,t_kk); else Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagComputeTempVector<0> >(0,nlocal),*this,t_kk); diff --git a/src/KOKKOS/compute_temp_kokkos.h b/src/KOKKOS/compute_temp_kokkos.h index a01bb42f8c3472d8254cb0ecd0e10bcd68461ceb..7f116f712038a450c1e7c5bad698b266aa824e4d 100644 --- a/src/KOKKOS/compute_temp_kokkos.h +++ b/src/KOKKOS/compute_temp_kokkos.h @@ -84,7 +84,7 @@ class ComputeTempKokkos : public ComputeTemp { protected: typename ArrayTypes<DeviceType>::t_v_array_randomread v; - double *rmass; + typename ArrayTypes<DeviceType>::t_float_1d_randomread rmass; typename ArrayTypes<DeviceType>::t_float_1d_randomread mass; typename ArrayTypes<DeviceType>::t_int_1d_randomread type; typename ArrayTypes<DeviceType>::t_int_1d_randomread mask; diff --git a/src/KOKKOS/fix_freeze_kokkos.cpp b/src/KOKKOS/fix_freeze_kokkos.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b87ec86f8f5c28ab4afe97a0c2e4d2cb9f0d1507 --- /dev/null +++ b/src/KOKKOS/fix_freeze_kokkos.cpp @@ -0,0 +1,124 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "fix_freeze_kokkos.h" +#include "atom_masks.h" +#include "atom_kokkos.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +FixFreezeKokkos<DeviceType>::FixFreezeKokkos(LAMMPS *lmp, int narg, char **arg) : + FixFreeze(lmp, narg, arg) +{ + kokkosable = 1; + atomKK = (AtomKokkos *)atom; + execution_space = ExecutionSpaceFromDevice<DeviceType>::space; + + datamask_read = F_MASK | MASK_MASK; + datamask_modify = F_MASK | TORQUE_MASK; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +int FixFreezeKokkos<DeviceType>::setmask() +{ + return FixFreeze::setmask(); +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +void FixFreezeKokkos<DeviceType>::init() +{ + FixFreeze::init(); +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +void FixFreezeKokkos<DeviceType>::setup(int vflag) +{ + FixFreeze::setup(vflag); +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +void FixFreezeKokkos<DeviceType>::post_force(int vflag) +{ + atomKK->sync(execution_space,datamask_read); + atomKK->modified(execution_space,datamask_modify); + + f = atomKK->k_f.view<DeviceType>(); + torque = atomKK->k_torque.view<DeviceType>(); + mask = atomKK->k_mask.view<DeviceType>(); + + int nlocal = atom->nlocal; + if (igroup == atom->firstgroup) nlocal = atom->nfirst; + + force_flag = 0; + copymode = 1; + OriginalForce original; + Kokkos::parallel_reduce(nlocal, *this, original); + copymode = 0; + + foriginal[0] = original.values[0]; + foriginal[1] = original.values[1]; + foriginal[2] = original.values[2]; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +void FixFreezeKokkos<DeviceType>::post_force_respa(int vflag, int ilevel, int iloop) +{ + post_force(vflag); +} + +/* ---------------------------------------------------------------------- + return components of total force on fix group before force was changed +------------------------------------------------------------------------- */ + +template<class DeviceType> +double FixFreezeKokkos<DeviceType>::compute_vector(int n) +{ + return FixFreeze::compute_vector(n); +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +void FixFreezeKokkos<DeviceType>::operator()(const int i, OriginalForce &original) const { + if (mask[i] & groupbit) { + original.values[0] += f(i,0); + original.values[1] += f(i,1); + original.values[2] += f(i,2); + f(i,0) = 0.0; + f(i,1) = 0.0; + f(i,2) = 0.0; + torque(i,0) = 0.0; + torque(i,1) = 0.0; + torque(i,2) = 0.0; + } +} + +namespace LAMMPS_NS { +template class FixFreezeKokkos<LMPDeviceType>; +#ifdef KOKKOS_HAVE_CUDA +template class FixFreezeKokkos<LMPHostType>; +#endif +} diff --git a/src/KOKKOS/fix_freeze_kokkos.h b/src/KOKKOS/fix_freeze_kokkos.h new file mode 100644 index 0000000000000000000000000000000000000000..193e0119958ec44422342a17a9ff8a22855287e0 --- /dev/null +++ b/src/KOKKOS/fix_freeze_kokkos.h @@ -0,0 +1,79 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(freeze/kk,FixFreezeKokkos<LMPDeviceType>) +FixStyle(freeze/kk/device,FixFreezeKokkos<LMPDeviceType>) +FixStyle(freeze/kk/host,FixFreezeKokkos<LMPHostType>) + +#else + +#ifndef LMP_FIX_FREEZE_KOKKOS_H +#define LMP_FIX_FREEZE_KOKKOS_H + +#include "fix_freeze.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +template<class DeviceType> +class FixFreezeKokkos : public FixFreeze { + public: + struct OriginalForce { + double values[3]; + + KOKKOS_INLINE_FUNCTION + OriginalForce() { + values[0] = 0; + values[1] = 0; + values[2] = 0; + } + + KOKKOS_INLINE_FUNCTION + OriginalForce &operator+=(const OriginalForce &rhs) { + values[0] += rhs.values[0]; + values[1] += rhs.values[1]; + values[2] += rhs.values[2]; + return *this; + } + + KOKKOS_INLINE_FUNCTION + void operator+=(const volatile OriginalForce &rhs) volatile { + values[0] += rhs.values[0]; + values[1] += rhs.values[1]; + values[2] += rhs.values[2]; + } + }; + + FixFreezeKokkos(class LAMMPS *, int, char **); + int setmask(); + void init(); + void setup(int); + void post_force(int); + void post_force_respa(int, int, int); + double compute_vector(int); + + KOKKOS_INLINE_FUNCTION + void operator()(const int i, OriginalForce &original) const; + + private: + typename ArrayTypes<DeviceType>::t_f_array f; + typename ArrayTypes<DeviceType>::t_f_array torque; + typename ArrayTypes<DeviceType>::t_int_1d mask; +}; + +} // namespace LAMMPS_NS + +#endif // LMP_FIX_FREEZE_KOKKOS_H +#endif // FIX_CLASS diff --git a/src/KOKKOS/fix_gravity_kokkos.cpp b/src/KOKKOS/fix_gravity_kokkos.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2aff7af56cf74ca970ce9c470379282fbcfd8e41 --- /dev/null +++ b/src/KOKKOS/fix_gravity_kokkos.cpp @@ -0,0 +1,121 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "fix_gravity_kokkos.h" +#include "atom_masks.h" +#include "modify.h" +#include "input.h" +#include "variable.h" +#include "update.h" +#include "atom_kokkos.h" +#include "atom_vec.h" + +using namespace LAMMPS_NS; + +enum{CONSTANT,EQUAL}; + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +FixGravityKokkos<DeviceType>::FixGravityKokkos(LAMMPS *lmp, int narg, char **arg) : + FixGravity(lmp, narg, arg) +{ + kokkosable = 1; + atomKK = (AtomKokkos *)atom; + execution_space = ExecutionSpaceFromDevice<DeviceType>::space; + + datamask_read = X_MASK | F_MASK | RMASS_MASK | MASK_MASK | TYPE_MASK; + datamask_modify = F_MASK; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +void FixGravityKokkos<DeviceType>::post_force(int vflag) +{ + // update gravity due to variables + + if (varflag != CONSTANT) { + modify->clearstep_compute(); + if (mstyle == EQUAL) magnitude = input->variable->compute_equal(mvar); + if (vstyle == EQUAL) vert = input->variable->compute_equal(vvar); + if (pstyle == EQUAL) phi = input->variable->compute_equal(pvar); + if (tstyle == EQUAL) theta = input->variable->compute_equal(tvar); + if (xstyle == EQUAL) xdir = input->variable->compute_equal(xvar); + if (ystyle == EQUAL) ydir = input->variable->compute_equal(yvar); + if (zstyle == EQUAL) zdir = input->variable->compute_equal(zvar); + modify->addstep_compute(update->ntimestep + 1); + + set_acceleration(); + } + + atomKK->sync(execution_space,datamask_read); + atomKK->modified(execution_space,datamask_modify); + + x = atomKK->k_x.view<DeviceType>(); + f = atomKK->k_f.view<DeviceType>(); + if (atomKK->rmass) + rmass = atomKK->k_rmass.view<DeviceType>(); + else + mass = atomKK->k_mass.view<DeviceType>(); + type = atomKK->k_type.view<DeviceType>(); + mask = atomKK->k_mask.view<DeviceType>(); + int nlocal = atomKK->nlocal; + if (igroup == atomKK->firstgroup) nlocal = atomKK->nfirst; + + copymode = 1; + + eflag = 0; + egrav = 0.0; + + if (atomKK->rmass) { + Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagFixGravityRMass>(0,nlocal), *this, egrav); + } + else { + Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagFixGravityMass>(0,nlocal), *this, egrav); + } + copymode = 0; +} + +template<class DeviceType> +KOKKOS_INLINE_FUNCTION +void FixGravityKokkos<DeviceType>::operator()(TagFixGravityRMass, const int i, double &eg) const +{ + if (mask[i] & groupbit) { + double massone = rmass[i]; + f(i,0) += massone*xacc; + f(i,1) += massone*yacc; + f(i,2) += massone*zacc; + eg -= massone * (xacc*x(i,0) + yacc*x(i,1) + zacc*x(i,2)); + } +} + +template<class DeviceType> +KOKKOS_INLINE_FUNCTION +void FixGravityKokkos<DeviceType>::operator()(TagFixGravityMass, const int i, double &eg) const +{ + if (mask[i] & groupbit) { + double massone = mass[type[i]]; + f(i,0) += massone*xacc; + f(i,1) += massone*yacc; + f(i,2) += massone*zacc; + eg -= massone * (xacc*x(i,0) + yacc*x(i,1) + zacc*x(i,2)); + } +} + +namespace LAMMPS_NS { +template class FixGravityKokkos<LMPDeviceType>; +#ifdef KOKKOS_HAVE_CUDA +template class FixGravityKokkos<LMPHostType>; +#endif +} diff --git a/src/KOKKOS/fix_gravity_kokkos.h b/src/KOKKOS/fix_gravity_kokkos.h new file mode 100644 index 0000000000000000000000000000000000000000..cc487a0e2dd317f4ee69f344ecc26bdb0eb423e5 --- /dev/null +++ b/src/KOKKOS/fix_gravity_kokkos.h @@ -0,0 +1,57 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(gravity/kk,FixGravityKokkos<LMPDeviceType>) +FixStyle(gravity/kk/device,FixGravityKokkos<LMPDeviceType>) +FixStyle(gravity/kk/host,FixGravityKokkos<LMPHostType>) + +#else + +#ifndef LMP_FIX_GRAVITY_KOKKOS_H +#define LMP_FIX_GRAVITY_KOKKOS_H + +#include "fix_gravity.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +struct TagFixGravityRMass {}; +struct TagFixGravityMass {}; + +template<class DeviceType> +class FixGravityKokkos : public FixGravity { + public: + FixGravityKokkos(class LAMMPS *, int, char **); + virtual ~FixGravityKokkos() {} + void post_force(int); + + KOKKOS_INLINE_FUNCTION + void operator()(TagFixGravityRMass, const int, double &) const; + KOKKOS_INLINE_FUNCTION + void operator()(TagFixGravityMass, const int, double &) const; + + private: + typename ArrayTypes<DeviceType>::t_x_array x; + typename ArrayTypes<DeviceType>::t_f_array f; + typename ArrayTypes<DeviceType>::t_float_1d_randomread rmass; + typename ArrayTypes<DeviceType>::t_float_1d_randomread mass; + typename ArrayTypes<DeviceType>::t_int_1d type; + typename ArrayTypes<DeviceType>::t_int_1d mask; +}; + +} // namespace LAMMPS_NS + +#endif // LMP_FIX_GRAVITY_KOKKOS_H +#endif // FIX_CLASS diff --git a/src/KOKKOS/fix_langevin_kokkos.cpp b/src/KOKKOS/fix_langevin_kokkos.cpp index 24d505e08d46854b3b72660436070ef1f9688a78..8c403d3325e515c1ba094162af8ad1539d3da04e 100644 --- a/src/KOKKOS/fix_langevin_kokkos.cpp +++ b/src/KOKKOS/fix_langevin_kokkos.cpp @@ -133,7 +133,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag) { // sync the device views which might have been modified on host atomKK->sync(execution_space,datamask_read); - rmass = atomKK->rmass; + rmass = atomKK->k_rmass.view<DeviceType>(); f = atomKK->k_f.template view<DeviceType>(); v = atomKK->k_v.template view<DeviceType>(); type = atomKK->k_type.template view<DeviceType>(); @@ -191,7 +191,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag) if (gjfflag) if (tallyflag) if (tbiasflag == BIAS) - if (rmass) + if (rmass.data()) if (zeroflag) { FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,1,1,1> post_functor(this); Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); @@ -210,7 +210,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag) Kokkos::parallel_for(nlocal,post_functor); } else - if (rmass) + if (rmass.data()) if (zeroflag) { FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,0,1,1> post_functor(this); Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); @@ -230,7 +230,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag) } else if (tbiasflag == BIAS) - if (rmass) + if (rmass.data()) if (zeroflag) { FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,1,1,1> post_functor(this); Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); @@ -249,7 +249,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag) Kokkos::parallel_for(nlocal,post_functor); } else - if (rmass) + if (rmass.data()) if (zeroflag) { FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,0,1,1> post_functor(this); Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); @@ -270,7 +270,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag) else if (tallyflag) if (tbiasflag == BIAS) - if (rmass) + if (rmass.data()) if (zeroflag) { FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,1,1,1> post_functor(this); Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); @@ -289,7 +289,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag) Kokkos::parallel_for(nlocal,post_functor); } else - if (rmass) + if (rmass.data()) if (zeroflag) { FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,0,1,1> post_functor(this); Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); @@ -309,7 +309,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag) } else if (tbiasflag == BIAS) - if (rmass) + if (rmass.data()) if (zeroflag) { FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,1,1,1> post_functor(this); Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); @@ -328,7 +328,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag) Kokkos::parallel_for(nlocal,post_functor); } else - if (rmass) + if (rmass.data()) if (zeroflag) { FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,0,1,1> post_functor(this); Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); @@ -350,7 +350,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag) if (gjfflag) if (tallyflag) if (tbiasflag == BIAS) - if (rmass) + if (rmass.data()) if (zeroflag) { FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,1,1,1> post_functor(this); Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); @@ -369,7 +369,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag) Kokkos::parallel_for(nlocal,post_functor); } else - if (rmass) + if (rmass.data()) if (zeroflag) { FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,0,1,1> post_functor(this); Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); @@ -389,7 +389,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag) } else if (tbiasflag == BIAS) - if (rmass) + if (rmass.data()) if (zeroflag) { FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,1,1,1> post_functor(this); Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); @@ -408,7 +408,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag) Kokkos::parallel_for(nlocal,post_functor); } else - if (rmass) + if (rmass.data()) if (zeroflag) { FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,0,1,1> post_functor(this); Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); @@ -429,7 +429,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag) else if (tallyflag) if (tbiasflag == BIAS) - if (rmass) + if (rmass.data()) if (zeroflag) { FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,1,1,1> post_functor(this); Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); @@ -448,7 +448,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag) Kokkos::parallel_for(nlocal,post_functor); } else - if (rmass) + if (rmass.data()) if (zeroflag) { FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,0,1,1> post_functor(this); Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); @@ -468,7 +468,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag) } else if (tbiasflag == BIAS) - if (rmass) + if (rmass.data()) if (zeroflag) { FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,1,1,1> post_functor(this); Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); @@ -487,7 +487,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag) Kokkos::parallel_for(nlocal,post_functor); } else - if (rmass) + if (rmass.data()) if (zeroflag) { FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,0,1,1> post_functor(this); Kokkos::parallel_reduce(nlocal,post_functor,s_fsum); diff --git a/src/KOKKOS/fix_langevin_kokkos.h b/src/KOKKOS/fix_langevin_kokkos.h index a9d015f6a1a669c76472a8dc300b509ade85b9e7..140fea81d69456c4d1b21d40a1b27c24c257e3fd 100644 --- a/src/KOKKOS/fix_langevin_kokkos.h +++ b/src/KOKKOS/fix_langevin_kokkos.h @@ -93,7 +93,7 @@ namespace LAMMPS_NS { private: class CommKokkos *commKK; - double *rmass; + typename ArrayTypes<DeviceType>::t_float_1d rmass; typename ArrayTypes<DeviceType>::tdual_double_2d k_franprev; typename ArrayTypes<DeviceType>::t_double_2d d_franprev; HAT::t_double_2d h_franprev; diff --git a/src/KOKKOS/fix_neigh_history_kokkos.cpp b/src/KOKKOS/fix_neigh_history_kokkos.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d481c20818f1362ae0894b1fb00d2b014699d5cb --- /dev/null +++ b/src/KOKKOS/fix_neigh_history_kokkos.cpp @@ -0,0 +1,335 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "fix_neigh_history_kokkos.h" +#include "atom_kokkos.h" +#include "error.h" +#include "memory_kokkos.h" +#include "neigh_list_kokkos.h" +#include "pair_kokkos.h" +#include "comm.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +template <class DeviceType> +FixNeighHistoryKokkos<DeviceType>::FixNeighHistoryKokkos(LAMMPS *lmp, int narg, char **arg) : + FixNeighHistory(lmp, narg, arg) +{ + kokkosable = 1; + atomKK = (AtomKokkos *)atom; + execution_space = ExecutionSpaceFromDevice<DeviceType>::space; + + memory->destroy(npartner); + memory->sfree(partner); + memory->sfree(valuepartner); + npartner = NULL; + partner = NULL; + valuepartner = NULL; + + maxpartner = 8; + grow_arrays(atom->nmax); + + d_resize = typename ArrayTypes<DeviceType>::t_int_scalar("FixNeighHistoryKokkos::resize"); +#ifndef KOKKOS_USE_CUDA_UVM + h_resize = Kokkos::create_mirror_view(d_resize); +#else + h_resize = d_resize; +#endif + h_resize() = 1; +} + +/* ---------------------------------------------------------------------- */ + +template <class DeviceType> +FixNeighHistoryKokkos<DeviceType>::~FixNeighHistoryKokkos() +{ + if (copymode) return; + + memoryKK->destroy_kokkos(k_npartner, npartner); + memoryKK->destroy_kokkos(k_partner, partner); + memoryKK->destroy_kokkos(k_valuepartner, valuepartner); +} + +/* ---------------------------------------------------------------------- */ + +template <class DeviceType> +void FixNeighHistoryKokkos<DeviceType>::init() +{ + if (atomKK->tag_enable == 0) + error->all(FLERR,"Neighbor history requires atoms have IDs"); +} + +/* ---------------------------------------------------------------------- */ + +template <class DeviceType> +void FixNeighHistoryKokkos<DeviceType>::pre_exchange() +{ + copymode = 1; + + h_resize() = 1; + while (h_resize() > 0) { + FixNeighHistoryKokkosZeroPartnerCountFunctor<DeviceType> zero(this); + Kokkos::parallel_for(nlocal_neigh,zero); + + h_resize() = 0; + deep_copy(d_resize, h_resize); + + FixNeighHistoryKokkosPreExchangeFunctor<DeviceType> f(this); + Kokkos::parallel_for(nlocal_neigh,f); + + deep_copy(h_resize, d_resize); + if (h_resize() > 0) { + maxpartner += 8; + memoryKK->grow_kokkos(k_partner,partner,atom->nmax,maxpartner,"neighbor_history:partner"); + memoryKK->grow_kokkos(k_valuepartner,valuepartner,atom->nmax,dnum*maxpartner,"neighbor_history:valuepartner"); + } + } + + copymode = 0; + + comm->maxexchange_fix = MAX(comm->maxexchange_fix,(dnum+1)*maxpartner+1); +} + +/* ---------------------------------------------------------------------- */ + +template <class DeviceType> +void FixNeighHistoryKokkos<DeviceType>::zero_partner_count_item(const int &i) const +{ + d_npartner[i] = 0; +} + +template <class DeviceType> +void FixNeighHistoryKokkos<DeviceType>::pre_exchange_item(const int &ii) const +{ + const int i = d_ilist[ii]; + const int jnum = d_numneigh[i]; + + for (int jj = 0; jj < jnum; jj++) { + if (d_firstflag(i,jj)) { + int j = d_neighbors(i,jj); + j &= NEIGHMASK; + int m = Kokkos::atomic_fetch_add(&d_npartner[i],1); + if (m < maxpartner) { + d_partner(i,m) = tag[j]; + for (int k = 0; k < dnum; k++) + d_valuepartner(i,dnum*m+k) = d_firstvalue(i,dnum*jj+k); + } else { + d_resize() = 1; + } + if (j < nlocal_neigh) { + m = Kokkos::atomic_fetch_add(&d_npartner[j],1); + if (m < maxpartner) { + d_partner(j,m) = tag[i]; + for (int k = 0; k < dnum; k++) + d_valuepartner(j,dnum*m+k) = d_firstvalue(i,dnum*jj+k); + } else { + d_resize() = 1; + } + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +template <class DeviceType> +void FixNeighHistoryKokkos<DeviceType>::setup_post_neighbor() +{ + post_neighbor(); +} + +/* ---------------------------------------------------------------------- */ + +template <class DeviceType> +void FixNeighHistoryKokkos<DeviceType>::post_neighbor() +{ + tag = atomKK->k_tag.view<DeviceType>(); + + int inum = pair->list->inum; + NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(pair->list); + d_numneigh = k_list->d_numneigh; + d_neighbors = k_list->d_neighbors; + d_ilist = k_list->d_ilist; + + // store atom counts used for new neighbor list which was just built + + int nlocal = atom->nlocal; + int nall = nlocal + atom->nghost; + nlocal_neigh = nlocal; + nall_neigh = nall; + + // realloc firstflag and firstvalue if needed + + if (maxatom < nlocal || k_list->maxneighs > d_firstflag.extent(1)) { + maxatom = nall; + d_firstflag = Kokkos::View<int**>("neighbor_history:firstflag",maxatom,k_list->maxneighs); + d_firstvalue = Kokkos::View<LMP_FLOAT**>("neighbor_history:firstvalue",maxatom,k_list->maxneighs*dnum); + } + + copymode = 1; + + FixNeighHistoryKokkosPostNeighborFunctor<DeviceType> f(this); + Kokkos::parallel_for(inum,f); + + copymode = 0; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +KOKKOS_INLINE_FUNCTION +void FixNeighHistoryKokkos<DeviceType>::post_neighbor_item(const int &ii) const +{ + const int i = d_ilist[ii]; + const int jnum = d_numneigh[i]; + const int np = d_npartner[i]; + + for (int jj = 0; jj < jnum; jj++) { + int j = d_neighbors(i,jj); + const int rflag = j >> SBBITS & 3; + j &= NEIGHMASK; + + int m; + if (rflag) { + int jtag = tag(j); + for (m = 0; m < np; m++) + if (d_partner(i, m) == jtag) break; + if (m < np) { + d_firstflag(i,jj) = 1; + for (int k = 0; k < dnum; k++) { + d_firstvalue(i, dnum*jj+k) = d_valuepartner(i, dnum*m+k); + } + } else { + d_firstflag(i,jj) = 0; + for (int k = 0; k < dnum; k++) { + d_firstvalue(i, dnum*jj+k) = 0; + } + } + } else { + d_firstflag(i,jj) = 0; + for (int k = 0; k < dnum; k++) { + d_firstvalue(i, dnum*jj+k) = 0; + } + } + } +} + +/* ---------------------------------------------------------------------- + memory usage of local atom-based arrays +------------------------------------------------------------------------- */ + +template<class DeviceType> +double FixNeighHistoryKokkos<DeviceType>::memory_usage() +{ + double bytes = d_firstflag.extent(0)*d_firstflag.extent(1)*sizeof(int); + bytes += d_firstvalue.extent(0)*d_firstvalue.extent(1)*sizeof(double); + bytes += 2*k_npartner.extent(0)*sizeof(int); + bytes += 2*k_partner.extent(0)*k_partner.extent(1)*sizeof(int); + bytes += 2*k_valuepartner.extent(0)*k_valuepartner.extent(1)*sizeof(double); + return bytes; +} + +/* ---------------------------------------------------------------------- + allocate fictitious charge arrays +------------------------------------------------------------------------- */ + +template<class DeviceType> +void FixNeighHistoryKokkos<DeviceType>::grow_arrays(int nmax) +{ + k_npartner.template sync<LMPHostType>(); // force reallocation on host + k_partner.template sync<LMPHostType>(); + k_valuepartner.template sync<LMPHostType>(); + + memoryKK->grow_kokkos(k_npartner,npartner,nmax,"neighbor_history:npartner"); + memoryKK->grow_kokkos(k_partner,partner,nmax,maxpartner,"neighbor_history:partner"); + memoryKK->grow_kokkos(k_valuepartner,valuepartner,nmax,dnum*maxpartner,"neighbor_history:valuepartner"); + + d_npartner = k_npartner.template view<DeviceType>(); + d_partner = k_partner.template view<DeviceType>(); + d_valuepartner = k_valuepartner.template view<DeviceType>(); + + k_npartner.template modify<LMPHostType>(); + k_partner.template modify<LMPHostType>(); + k_valuepartner.template modify<LMPHostType>(); +} + +/* ---------------------------------------------------------------------- + copy values within fictitious charge arrays +------------------------------------------------------------------------- */ + +template<class DeviceType> +void FixNeighHistoryKokkos<DeviceType>::copy_arrays(int i, int j, int delflag) +{ + k_npartner.template sync<LMPHostType>(); + k_partner.template sync<LMPHostType>(); + k_valuepartner.template sync<LMPHostType>(); + + npartner[j] = npartner[i]; + for (int m = 0; m < npartner[i]; m++) { + partner[j][m] = partner[i][m]; + valuepartner[j][m] = valuepartner[i][m]; + } + + k_npartner.template modify<LMPHostType>(); + k_partner.template modify<LMPHostType>(); + k_valuepartner.template modify<LMPHostType>(); +} + +/* ---------------------------------------------------------------------- + pack values in local atom-based array for exchange with another proc +------------------------------------------------------------------------- */ + +template<class DeviceType> +int FixNeighHistoryKokkos<DeviceType>::pack_exchange(int i, double *buf) +{ + k_npartner.template sync<LMPHostType>(); + k_partner.template sync<LMPHostType>(); + k_valuepartner.template sync<LMPHostType>(); + + int n = 0; + buf[n++] = npartner[i]; + for (int m = 0; m < npartner[i]; m++) buf[n++] = partner[i][m]; + for (int m = 0; m < dnum*npartner[i]; m++) buf[n++] = valuepartner[i][m]; + + return n; +} + +/* ---------------------------------------------------------------------- + unpack values in local atom-based array from exchange with another proc +------------------------------------------------------------------------- */ + +template<class DeviceType> +int FixNeighHistoryKokkos<DeviceType>::unpack_exchange(int nlocal, double *buf) +{ + int n = 0; + npartner[nlocal] = static_cast<int>(buf[n++]); + for (int m = 0; m < npartner[nlocal]; m++) partner[nlocal][m] = static_cast<int>(buf[n++]); + for (int m = 0; m < dnum*npartner[nlocal]; m++) valuepartner[nlocal][m] = buf[n++]; + + k_npartner.template modify<LMPHostType>(); + k_partner.template modify<LMPHostType>(); + k_valuepartner.template modify<LMPHostType>(); + + return n; +} + +/* ---------------------------------------------------------------------- */ + +namespace LAMMPS_NS { +template class FixNeighHistoryKokkos<LMPDeviceType>; +#ifdef KOKKOS_HAVE_CUDA +template class FixNeighHistoryKokkos<LMPHostType>; +#endif +} diff --git a/src/KOKKOS/fix_neigh_history_kokkos.h b/src/KOKKOS/fix_neigh_history_kokkos.h new file mode 100644 index 0000000000000000000000000000000000000000..d5bb1c3971d5dbea19cca627e0314ae1c4b5cb35 --- /dev/null +++ b/src/KOKKOS/fix_neigh_history_kokkos.h @@ -0,0 +1,107 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(NEIGH_HISTORY/KK,FixNeighHistoryKokkos<LMPDeviceType>) +FixStyle(NEIGH_HISTORY/KK/DEVICE,FixNeighHistoryKokkos<LMPDeviceType>) +FixStyle(NEIGH_HISTORY/KK/HOST,FixNeighHistoryKokkos<LMPHostType>) + +#else + +#ifndef LMP_FIX_NEIGH_HISTORY_KOKKOS_H +#define LMP_FIX_NEIGH_HISTORY_KOKKOS_H + +#include "fix_neigh_history.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { +template <class DeviceType> +class FixNeighHistoryKokkos : public FixNeighHistory { + public: + FixNeighHistoryKokkos(class LAMMPS *, int, char **); + ~FixNeighHistoryKokkos(); + + void init(); + void pre_exchange(); + void setup_post_neighbor(); + virtual void post_neighbor(); + double memory_usage(); + void grow_arrays(int); + void copy_arrays(int, int, int); + int pack_exchange(int, double *); + int unpack_exchange(int, double *); + + KOKKOS_INLINE_FUNCTION + void zero_partner_count_item(const int &i) const; + KOKKOS_INLINE_FUNCTION + void pre_exchange_item(const int &ii) const; + KOKKOS_INLINE_FUNCTION + void post_neighbor_item(const int &ii) const; + + typename Kokkos::View<int**> d_firstflag; + typename Kokkos::View<LMP_FLOAT**> d_firstvalue; + + private: + typename ArrayTypes<DeviceType>::tdual_int_1d k_npartner; + typename ArrayTypes<DeviceType>::tdual_tagint_2d k_partner; + typename ArrayTypes<DeviceType>::tdual_float_2d k_valuepartner; + + // for neighbor list lookup + typename ArrayTypes<DeviceType>::t_neighbors_2d d_neighbors; + typename ArrayTypes<DeviceType>::t_int_1d_randomread d_ilist; + typename ArrayTypes<DeviceType>::t_int_1d_randomread d_numneigh; + + typename ArrayTypes<DeviceType>::t_tagint_1d tag; + typename ArrayTypes<DeviceType>::t_int_1d d_npartner; + typename ArrayTypes<DeviceType>::t_tagint_2d d_partner; + typename ArrayTypes<DeviceType>::t_float_2d d_valuepartner; + + typename ArrayTypes<DeviceType>::t_int_scalar d_resize; + typename ArrayTypes<LMPHostType>::t_int_scalar h_resize; +}; + +template <class DeviceType> +struct FixNeighHistoryKokkosZeroPartnerCountFunctor { + FixNeighHistoryKokkos<DeviceType> c; + FixNeighHistoryKokkosZeroPartnerCountFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {} + KOKKOS_INLINE_FUNCTION + void operator()(const int &i) const { + c.zero_partner_count_item(i); + } +}; + +template <class DeviceType> +struct FixNeighHistoryKokkosPreExchangeFunctor { + FixNeighHistoryKokkos<DeviceType> c; + FixNeighHistoryKokkosPreExchangeFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {} + KOKKOS_INLINE_FUNCTION + void operator() (const int &i) const { + c.pre_exchange_item(i); + } +}; + +template <class DeviceType> +struct FixNeighHistoryKokkosPostNeighborFunctor { + FixNeighHistoryKokkos<DeviceType> c; + FixNeighHistoryKokkosPostNeighborFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {} + KOKKOS_INLINE_FUNCTION + void operator() (const int &i) const { + c.post_neighbor_item(i); + } +}; + +} // namespace LAMMPS_NS + +#endif // LMP_FIX_NEIGH_HISTORY_KOKKOS_H +#endif // FIX_CLASS diff --git a/src/KOKKOS/fix_nh_kokkos.cpp b/src/KOKKOS/fix_nh_kokkos.cpp index 347177b7f454bc03abfd739e8d51f218065b24de..42d421e92ef85bfc1539c808e61d55f508283b9f 100644 --- a/src/KOKKOS/fix_nh_kokkos.cpp +++ b/src/KOKKOS/fix_nh_kokkos.cpp @@ -537,7 +537,7 @@ void FixNHKokkos<DeviceType>::nve_v() v = atomKK->k_v.view<DeviceType>(); f = atomKK->k_f.view<DeviceType>(); - rmass = atomKK->rmass; + rmass = atomKK->k_rmass.view<DeviceType>(); mass = atomKK->k_mass.view<DeviceType>(); type = atomKK->k_type.view<DeviceType>(); mask = atomKK->k_mask.view<DeviceType>(); @@ -545,7 +545,7 @@ void FixNHKokkos<DeviceType>::nve_v() if (igroup == atomKK->firstgroup) nlocal = atomKK->nfirst; copymode = 1; - if (rmass) + if (rmass.data()) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNH_nve_v<1> >(0,nlocal),*this); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNH_nve_v<0> >(0,nlocal),*this); diff --git a/src/KOKKOS/fix_nh_kokkos.h b/src/KOKKOS/fix_nh_kokkos.h index a625cdbc204c82c10e209f537115198b6b811d73..ae18bd6dbdfd075325055a3dd0b72057ede45ac4 100644 --- a/src/KOKKOS/fix_nh_kokkos.h +++ b/src/KOKKOS/fix_nh_kokkos.h @@ -71,8 +71,8 @@ class FixNHKokkos : public FixNH { typename ArrayTypes<DeviceType>::t_x_array x; typename ArrayTypes<DeviceType>::t_v_array v; typename ArrayTypes<DeviceType>::t_f_array_const f; - double *rmass; - typename ArrayTypes<DeviceType>::t_float_1d_randomread mass; + typename ArrayTypes<DeviceType>::t_float_1d rmass; + typename ArrayTypes<DeviceType>::t_float_1d mass; typename ArrayTypes<DeviceType>::t_int_1d type; typename ArrayTypes<DeviceType>::t_int_1d mask; }; diff --git a/src/KOKKOS/fix_nve_kokkos.cpp b/src/KOKKOS/fix_nve_kokkos.cpp index cb3eadcd9009f8e7a4426dae6a2bc0c459592111..621f16f4201c76a25162ec325e0ddae1d705f916 100644 --- a/src/KOKKOS/fix_nve_kokkos.cpp +++ b/src/KOKKOS/fix_nve_kokkos.cpp @@ -62,14 +62,14 @@ void FixNVEKokkos<DeviceType>::initial_integrate(int vflag) x = atomKK->k_x.view<DeviceType>(); v = atomKK->k_v.view<DeviceType>(); f = atomKK->k_f.view<DeviceType>(); - rmass = atomKK->rmass; + rmass = atomKK->k_rmass.view<DeviceType>(); mass = atomKK->k_mass.view<DeviceType>(); type = atomKK->k_type.view<DeviceType>(); mask = atomKK->k_mask.view<DeviceType>(); int nlocal = atomKK->nlocal; if (igroup == atomKK->firstgroup) nlocal = atomKK->nfirst; - if (rmass) { + if (rmass.data()) { FixNVEKokkosInitialIntegrateFunctor<DeviceType,1> functor(this); Kokkos::parallel_for(nlocal,functor); } else { @@ -118,14 +118,14 @@ void FixNVEKokkos<DeviceType>::final_integrate() v = atomKK->k_v.view<DeviceType>(); f = atomKK->k_f.view<DeviceType>(); - rmass = atomKK->rmass; + rmass = atomKK->k_rmass.view<DeviceType>(); mass = atomKK->k_mass.view<DeviceType>(); type = atomKK->k_type.view<DeviceType>(); mask = atomKK->k_mask.view<DeviceType>(); int nlocal = atomKK->nlocal; if (igroup == atomKK->firstgroup) nlocal = atomKK->nfirst; - if (rmass) { + if (rmass.data()) { FixNVEKokkosFinalIntegrateFunctor<DeviceType,1> functor(this); Kokkos::parallel_for(nlocal,functor); } else { diff --git a/src/KOKKOS/fix_nve_kokkos.h b/src/KOKKOS/fix_nve_kokkos.h index 578e0f46cc932efc4b6434b97612b64a64ef8533..e2230d86b622cabd5ff063987688ae3891d71410 100644 --- a/src/KOKKOS/fix_nve_kokkos.h +++ b/src/KOKKOS/fix_nve_kokkos.h @@ -60,8 +60,8 @@ class FixNVEKokkos : public FixNVE { typename ArrayTypes<DeviceType>::t_x_array x; typename ArrayTypes<DeviceType>::t_v_array v; typename ArrayTypes<DeviceType>::t_f_array_const f; - double *rmass; - typename ArrayTypes<DeviceType>::t_float_1d_randomread mass; + typename ArrayTypes<DeviceType>::t_float_1d rmass; + typename ArrayTypes<DeviceType>::t_float_1d mass; typename ArrayTypes<DeviceType>::t_int_1d type; typename ArrayTypes<DeviceType>::t_int_1d mask; }; diff --git a/src/KOKKOS/fix_nve_sphere_kokkos.cpp b/src/KOKKOS/fix_nve_sphere_kokkos.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d636f56b20747656a50b6f12ec529d4e37724ef0 --- /dev/null +++ b/src/KOKKOS/fix_nve_sphere_kokkos.cpp @@ -0,0 +1,155 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "fix_nve_sphere_kokkos.h" +#include "atom_masks.h" +#include "atom_kokkos.h" +#include "error.h" + +using namespace LAMMPS_NS; + +enum{NONE,DIPOLE}; + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +FixNVESphereKokkos<DeviceType>::FixNVESphereKokkos(LAMMPS *lmp, int narg, char **arg) : + FixNVESphere(lmp, narg, arg) +{ + kokkosable = 1; + atomKK = (AtomKokkos *)atom; + execution_space = ExecutionSpaceFromDevice<DeviceType>::space; + + datamask_read = F_MASK | TORQUE_MASK | RMASS_MASK | RADIUS_MASK | MASK_MASK; + datamask_modify = X_MASK | V_MASK | OMEGA_MASK; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +void FixNVESphereKokkos<DeviceType>::cleanup_copy() +{ + id = style = NULL; + vatom = NULL; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +void FixNVESphereKokkos<DeviceType>::init() +{ + FixNVESphere::init(); + + if (extra == DIPOLE) { + error->all(FLERR,"Fix nve/sphere/kk doesn't yet support dipole"); + } +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +void FixNVESphereKokkos<DeviceType>::initial_integrate(int vflag) +{ + atomKK->sync(execution_space,datamask_read); + atomKK->modified(execution_space,datamask_modify); + + x = atomKK->k_x.view<DeviceType>(); + v = atomKK->k_v.view<DeviceType>(); + omega = atomKK->k_omega.view<DeviceType>(); + f = atomKK->k_f.view<DeviceType>(); + torque = atomKK->k_torque.view<DeviceType>(); + mask = atomKK->k_mask.view<DeviceType>(); + rmass = atomKK->k_rmass.view<DeviceType>(); + radius = atomKK->k_radius.view<DeviceType>(); + + int nlocal = atom->nlocal; + if (igroup == atom->firstgroup) nlocal = atom->nfirst; + + FixNVESphereKokkosInitialIntegrateFunctor<DeviceType> f(this); + Kokkos::parallel_for(nlocal,f); +} + +/* ---------------------------------------------------------------------- */ + +template <class DeviceType> +KOKKOS_INLINE_FUNCTION +void FixNVESphereKokkos<DeviceType>::initial_integrate_item(const int i) const +{ + const double dtfrotate = dtf / inertia; + + if (mask(i) & groupbit) { + const double dtfm = dtf / rmass(i); + v(i,0) += dtfm * f(i,0); + v(i,1) += dtfm * f(i,1); + v(i,2) += dtfm * f(i,2); + x(i,0) += dtv * v(i,0); + x(i,1) += dtv * v(i,1); + x(i,2) += dtv * v(i,2); + + const double dtirotate = dtfrotate / (radius(i)*radius(i)*rmass(i)); + omega(i,0) += dtirotate * torque(i,0); + omega(i,1) += dtirotate * torque(i,1); + omega(i,2) += dtirotate * torque(i,2); + } +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +void FixNVESphereKokkos<DeviceType>::final_integrate() +{ + atomKK->sync(execution_space,datamask_read); + atomKK->modified(execution_space,datamask_modify); + + v = atomKK->k_v.view<DeviceType>(); + omega = atomKK->k_omega.view<DeviceType>(); + f = atomKK->k_f.view<DeviceType>(); + torque = atomKK->k_torque.view<DeviceType>(); + mask = atomKK->k_mask.view<DeviceType>(); + rmass = atomKK->k_rmass.view<DeviceType>(); + radius = atomKK->k_radius.view<DeviceType>(); + + int nlocal = atom->nlocal; + if (igroup == atom->firstgroup) nlocal = atom->nfirst; + + FixNVESphereKokkosFinalIntegrateFunctor<DeviceType> f(this); + Kokkos::parallel_for(nlocal,f); +} + +/* ---------------------------------------------------------------------- */ + +template <class DeviceType> +KOKKOS_INLINE_FUNCTION +void FixNVESphereKokkos<DeviceType>::final_integrate_item(const int i) const +{ + const double dtfrotate = dtf / inertia; + + if (mask(i) & groupbit) { + const double dtfm = dtf / rmass(i); + v(i,0) += dtfm * f(i,0); + v(i,1) += dtfm * f(i,1); + v(i,2) += dtfm * f(i,2); + + const double dtirotate = dtfrotate / (radius(i)*radius(i)*rmass(i)); + omega(i,0) += dtirotate * torque(i,0); + omega(i,1) += dtirotate * torque(i,1); + omega(i,2) += dtirotate * torque(i,2); + } +} + +namespace LAMMPS_NS { +template class FixNVESphereKokkos<LMPDeviceType>; +#ifdef KOKKOS_HAVE_CUDA +template class FixNVESphereKokkos<LMPHostType>; +#endif +} diff --git a/src/KOKKOS/fix_nve_sphere_kokkos.h b/src/KOKKOS/fix_nve_sphere_kokkos.h new file mode 100644 index 0000000000000000000000000000000000000000..382e530319f85ec52945dc90ea25bf47fe7f1b7f --- /dev/null +++ b/src/KOKKOS/fix_nve_sphere_kokkos.h @@ -0,0 +1,79 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(nve/sphere/kk,FixNVESphereKokkos<LMPDeviceType>) +FixStyle(nve/sphere/kk/device,FixNVESphereKokkos<LMPDeviceType>) +FixStyle(nve/sphere/kk/host,FixNVESphereKokkos<LMPHostType>) + +#else + +#ifndef LMP_FIX_NVE_SPHERE_KOKKOS_H +#define LMP_FIX_NVE_SPHERE_KOKKOS_H + +#include "fix_nve_sphere.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +template<class DeviceType> +class FixNVESphereKokkos : public FixNVESphere { + public: + FixNVESphereKokkos(class LAMMPS *, int, char **); + virtual ~FixNVESphereKokkos() {} + void cleanup_copy(); + void init(); + void initial_integrate(int); + void final_integrate(); + + KOKKOS_INLINE_FUNCTION + void initial_integrate_item(const int i) const; + KOKKOS_INLINE_FUNCTION + void final_integrate_item(const int i) const; + + private: + typename ArrayTypes<DeviceType>::t_x_array x; + typename ArrayTypes<DeviceType>::t_v_array v; + typename ArrayTypes<DeviceType>::t_v_array omega; + typename ArrayTypes<DeviceType>::t_f_array f; + typename ArrayTypes<DeviceType>::t_f_array torque; + typename ArrayTypes<DeviceType>::t_float_1d rmass; + typename ArrayTypes<DeviceType>::t_float_1d radius; + typename ArrayTypes<DeviceType>::t_int_1d mask; +}; + +template <class DeviceType> +struct FixNVESphereKokkosInitialIntegrateFunctor { + FixNVESphereKokkos<DeviceType> c; + FixNVESphereKokkosInitialIntegrateFunctor(FixNVESphereKokkos<DeviceType> *c_ptr): c(*c_ptr) { c.cleanup_copy(); } + KOKKOS_INLINE_FUNCTION + void operator()(const int i) const { + c.initial_integrate_item(i); + } +}; + +template <class DeviceType> +struct FixNVESphereKokkosFinalIntegrateFunctor { + FixNVESphereKokkos<DeviceType> c; + FixNVESphereKokkosFinalIntegrateFunctor(FixNVESphereKokkos<DeviceType> *c_ptr): c(*c_ptr) { c.cleanup_copy(); } + KOKKOS_INLINE_FUNCTION + void operator()(const int i) const { + c.final_integrate_item(i); + } +}; + +} // namespace LAMMPS_NS + +#endif // LMP_FIX_NVE_SPHERE_KOKKOS_H +#endif // FIX_CLASS diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp index 4411627a784f01329cb35da5afbb9457597d45e1..6f75e6cabcb05dea636941733b240c5cf9203d1e 100644 --- a/src/KOKKOS/npair_kokkos.cpp +++ b/src/KOKKOS/npair_kokkos.cpp @@ -24,8 +24,8 @@ namespace LAMMPS_NS { /* ---------------------------------------------------------------------- */ -template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI> -NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::NPairKokkos(LAMMPS *lmp) : NPair(lmp) { +template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI, int SIZE> +NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::NPairKokkos(LAMMPS *lmp) : NPair(lmp) { } @@ -33,8 +33,8 @@ NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::NPairKokkos(LAMMPS *lmp) : NPair(l copy needed info from Neighbor class to this build class ------------------------------------------------------------------------- */ -template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI> -void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::copy_neighbor_info() +template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI, int SIZE> +void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::copy_neighbor_info() { NPair::copy_neighbor_info(); @@ -63,8 +63,8 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::copy_neighbor_info() copy per-atom and per-bin vectors from NBin class to this build class ------------------------------------------------------------------------- */ -template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI> -void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::copy_bin_info() +template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI, int SIZE> +void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::copy_bin_info() { NPair::copy_bin_info(); @@ -80,8 +80,8 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::copy_bin_info() copy needed info from NStencil class to this build class ------------------------------------------------------------------------- */ -template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI> -void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::copy_stencil_info() +template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI, int SIZE> +void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::copy_stencil_info() { NPair::copy_stencil_info(); @@ -110,8 +110,8 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::copy_stencil_info() /* ---------------------------------------------------------------------- */ -template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI> -void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::build(NeighList *list_) +template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI, int SIZE> +void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::build(NeighList *list_) { NeighListKokkos<DeviceType>* list = (NeighListKokkos<DeviceType>*) list_; const int nlocal = includegroup?atom->nfirst:atom->nlocal; @@ -131,6 +131,7 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::build(NeighList *list_) k_stencilxyz.view<DeviceType>(), nlocal, atomKK->k_x.view<DeviceType>(), + atomKK->k_radius.view<DeviceType>(), atomKK->k_type.view<DeviceType>(), atomKK->k_mask.view<DeviceType>(), atomKK->k_molecule.view<DeviceType>(), @@ -155,7 +156,8 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::build(NeighList *list_) k_ex_mol_intra.view<DeviceType>(), bboxhi,bboxlo, domain->xperiodic,domain->yperiodic,domain->zperiodic, - domain->xprd_half,domain->yprd_half,domain->zprd_half); + domain->xprd_half,domain->yprd_half,domain->zprd_half, + skin); k_cutneighsq.sync<DeviceType>(); k_ex1_type.sync<DeviceType>(); @@ -171,7 +173,7 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::build(NeighList *list_) k_bincount.sync<DeviceType>(); k_bins.sync<DeviceType>(); k_atom2bin.sync<DeviceType>(); - atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK); + atomKK->sync(Device,X_MASK|RADIUS_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK); data.special_flag[0] = special_flag[0]; data.special_flag[1] = special_flag[1]; @@ -198,25 +200,49 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::build(NeighList *list_) Kokkos::parallel_for(nall, f); } else { if (newton_pair) { - NPairKokkosBuildFunctor<DeviceType,TRI?0:HALF_NEIGH,1,TRI> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); + if (SIZE) { + NPairKokkosBuildFunctorSize<DeviceType,TRI?0:HALF_NEIGH,1,TRI> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); #ifdef KOKKOS_HAVE_CUDA - if (ExecutionSpaceFromDevice<DeviceType>::space == Device) - Kokkos::parallel_for(config, f); - else - Kokkos::parallel_for(nall, f); + if (ExecutionSpaceFromDevice<DeviceType>::space == Device) + Kokkos::parallel_for(config, f); + else + Kokkos::parallel_for(nall, f); #else - Kokkos::parallel_for(nall, f); + Kokkos::parallel_for(nall, f); #endif + } else { + NPairKokkosBuildFunctor<DeviceType,TRI?0:HALF_NEIGH,1,TRI> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); +#ifdef KOKKOS_HAVE_CUDA + if (ExecutionSpaceFromDevice<DeviceType>::space == Device) + Kokkos::parallel_for(config, f); + else + Kokkos::parallel_for(nall, f); +#else + Kokkos::parallel_for(nall, f); +#endif + } } else { - NPairKokkosBuildFunctor<DeviceType,HALF_NEIGH,0,0> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); + if (SIZE) { + NPairKokkosBuildFunctorSize<DeviceType,HALF_NEIGH,0,0> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); #ifdef KOKKOS_HAVE_CUDA - if (ExecutionSpaceFromDevice<DeviceType>::space == Device) - Kokkos::parallel_for(config, f); - else - Kokkos::parallel_for(nall, f); + if (ExecutionSpaceFromDevice<DeviceType>::space == Device) + Kokkos::parallel_for(config, f); + else + Kokkos::parallel_for(nall, f); #else - Kokkos::parallel_for(nall, f); + Kokkos::parallel_for(nall, f); #endif + } else { + NPairKokkosBuildFunctor<DeviceType,HALF_NEIGH,0,0> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); +#ifdef KOKKOS_HAVE_CUDA + if (ExecutionSpaceFromDevice<DeviceType>::space == Device) + Kokkos::parallel_for(config, f); + else + Kokkos::parallel_for(nall, f); +#else + Kokkos::parallel_for(nall, f); +#endif + } } } deep_copy(data.h_resize, data.resize); @@ -774,19 +800,306 @@ void NeighborKokkosExecute<DeviceType>:: neigh_list.d_ilist(i) = i; } +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> template<int HalfNeigh,int Newton,int Tri> +void NeighborKokkosExecute<DeviceType>:: + build_ItemSize(const int &i) const +{ + /* if necessary, goto next page and add pages */ + int n = 0; + + // get subview of neighbors of i + + const AtomNeighbors neighbors_i = neigh_list.get_neighbors(i); + const X_FLOAT xtmp = x(i, 0); + const X_FLOAT ytmp = x(i, 1); + const X_FLOAT ztmp = x(i, 2); + const X_FLOAT radi = radius(i); + const int itype = type(i); + + const int ibin = c_atom2bin(i); + + const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil + = d_stencil; + + const int mask_history = 3 << SBBITS; + + // loop over all bins in neighborhood (includes ibin) + if(HalfNeigh) + for(int m = 0; m < c_bincount(ibin); m++) { + const int j = c_bins(ibin,m); + const int jtype = type(j); + + //for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using HalfNeighborlists + if((j == i) || (HalfNeigh && !Newton && (j < i)) || + (HalfNeigh && Newton && ((j < i) || ((j >= nlocal) && + ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) || + (x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp))))) + ) continue; + if(exclude && exclusion(i,j,itype,jtype)) continue; + + const X_FLOAT delx = xtmp - x(j, 0); + const X_FLOAT dely = ytmp - x(j, 1); + const X_FLOAT delz = ztmp - x(j, 2); + const X_FLOAT rsq = delx * delx + dely * dely + delz * delz; + const X_FLOAT radsum = radi + radius(j); + const X_FLOAT cutsq = (radsum + skin) * (radsum + skin); + + if(rsq <= cutsq) { + if(n<neigh_list.maxneighs) { + if(neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history; + else neighbors_i(n++) = j; + } + else n++; + } + } + + for(int k = 0; k < nstencil; k++) { + const int jbin = ibin + stencil[k]; + + // get subview of jbin + if(HalfNeigh && (ibin==jbin)) continue; + //const ArrayTypes<DeviceType>::t_int_1d_const_um =Kokkos::subview<t_int_1d_const_um>(bins,jbin,ALL); + for(int m = 0; m < c_bincount(jbin); m++) { + + const int j = c_bins(jbin,m); + const int jtype = type(j); + + if(HalfNeigh && !Newton && (j < i)) continue; + if(!HalfNeigh && j==i) continue; + if(Tri) { + if (x(j,2) < ztmp) continue; + if (x(j,2) == ztmp) { + if (x(j,1) < ytmp) continue; + if (x(j,1) == ytmp) { + if (x(j,0) < xtmp) continue; + if (x(j,0) == xtmp && j <= i) continue; + } + } + } + if(exclude && exclusion(i,j,itype,jtype)) continue; + + const X_FLOAT delx = xtmp - x(j, 0); + const X_FLOAT dely = ytmp - x(j, 1); + const X_FLOAT delz = ztmp - x(j, 2); + const X_FLOAT rsq = delx * delx + dely * dely + delz * delz; + const X_FLOAT radsum = radi + radius(j); + const X_FLOAT cutsq = (radsum + skin) * (radsum + skin); + + if(rsq <= cutsq) { + if(n<neigh_list.maxneighs) { + if(neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history; + else neighbors_i(n++) = j; + } + else n++; + } + } + } + + neigh_list.d_numneigh(i) = n; + + if(n > neigh_list.maxneighs) { + resize() = 1; + + if(n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop + } + + neigh_list.d_ilist(i) = i; +} + +/* ---------------------------------------------------------------------- */ + +#ifdef KOKKOS_HAVE_CUDA +template<class DeviceType> template<int HalfNeigh,int Newton,int Tri> +__device__ inline +void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const +{ + /* loop over atoms in i's bin, + */ + const int atoms_per_bin = c_bins.extent(1); + const int BINS_PER_TEAM = dev.team_size()/atoms_per_bin<1?1:dev.team_size()/atoms_per_bin; + const int TEAMS_PER_BIN = atoms_per_bin/dev.team_size()<1?1:atoms_per_bin/dev.team_size(); + const int MY_BIN = dev.team_rank()/atoms_per_bin; + + const int ibin = dev.league_rank()*BINS_PER_TEAM+MY_BIN; + + if(ibin >=c_bincount.extent(0)) return; + X_FLOAT* other_x = sharedmem; + other_x = other_x + 6*atoms_per_bin*MY_BIN; + + int* other_id = (int*) &other_x[5 * atoms_per_bin]; + + int bincount_current = c_bincount[ibin]; + + for(int kk = 0; kk < TEAMS_PER_BIN; kk++) { + const int MY_II = dev.team_rank()%atoms_per_bin+kk*dev.team_size(); + const int i = MY_II < bincount_current ? c_bins(ibin, MY_II) : -1; + /* if necessary, goto next page and add pages */ + + int n = 0; + + X_FLOAT xtmp; + X_FLOAT ytmp; + X_FLOAT ztmp; + X_FLOAT radi; + int itype; + const AtomNeighbors neighbors_i = neigh_list.get_neighbors((i>=0&&i<nlocal)?i:0); + const int mask_history = 3 << SBBITS; + + if(i >= 0) { + xtmp = x(i, 0); + ytmp = x(i, 1); + ztmp = x(i, 2); + radi = radius(i); + itype = type(i); + other_x[MY_II] = xtmp; + other_x[MY_II + atoms_per_bin] = ytmp; + other_x[MY_II + 2 * atoms_per_bin] = ztmp; + other_x[MY_II + 3 * atoms_per_bin] = itype; + other_x[MY_II + 4 * atoms_per_bin] = radi; + } + other_id[MY_II] = i; + int test = (__syncthreads_count(i >= 0 && i <= nlocal) == 0); + + if(test) return; + + if(i >= 0 && i < nlocal) { + #pragma unroll 4 + for(int m = 0; m < bincount_current; m++) { + int j = other_id[m]; + const int jtype = other_x[m + 3 * atoms_per_bin]; + + //for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using halfneighborlists + if((j == i) || + (HalfNeigh && !Newton && (j < i)) || + (HalfNeigh && Newton && + ((j < i) || + ((j >= nlocal) && ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) || + (x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp))))) + ) continue; + if(Tri) { + if (x(j,2) < ztmp) continue; + if (x(j,2) == ztmp) { + if (x(j,1) < ytmp) continue; + if (x(j,1) == ytmp) { + if (x(j,0) < xtmp) continue; + if (x(j,0) == xtmp && j <= i) continue; + } + } + } + if(exclude && exclusion(i,j,itype,jtype)) continue; + const X_FLOAT delx = xtmp - other_x[m]; + const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin]; + const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin]; + const X_FLOAT rsq = delx * delx + dely * dely + delz * delz; + const X_FLOAT radsum = radi + other_x[m + 4 * atoms_per_bin]; + const X_FLOAT cutsq = (radsum + skin) * (radsum + skin); + + if(rsq <= cutsq) { + if(n<neigh_list.maxneighs) { + if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history; + else neighbors_i(n++) = j; + } + else n++; + } + } + } + __syncthreads(); + + const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil + = d_stencil; + for(int k = 0; k < nstencil; k++) { + const int jbin = ibin + stencil[k]; + + if(ibin == jbin) continue; + + bincount_current = c_bincount[jbin]; + int j = MY_II < bincount_current ? c_bins(jbin, MY_II) : -1; + + if(j >= 0) { + other_x[MY_II] = x(j, 0); + other_x[MY_II + atoms_per_bin] = x(j, 1); + other_x[MY_II + 2 * atoms_per_bin] = x(j, 2); + other_x[MY_II + 3 * atoms_per_bin] = type(j); + other_x[MY_II + 4 * atoms_per_bin] = radius(j); + } + + other_id[MY_II] = j; + + __syncthreads(); + + if(i >= 0 && i < nlocal) { + #pragma unroll 8 + for(int m = 0; m < bincount_current; m++) { + const int j = other_id[m]; + const int jtype = other_x[m + 3 * atoms_per_bin]; + + if(HalfNeigh && (j < i)) continue; + if(HalfNeigh && !Newton && (j < i)) continue; + if(!HalfNeigh && j==i) continue; + if(Tri) { + if (x(j,2) < ztmp) continue; + if (x(j,2) == ztmp) { + if (x(j,1) < ytmp) continue; + if (x(j,1) == ytmp) { + if (x(j,0) < xtmp) continue; + if (x(j,0) == xtmp && j <= i) continue; + } + } + } + if(exclude && exclusion(i,j,itype,jtype)) continue; + + const X_FLOAT delx = xtmp - other_x[m]; + const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin]; + const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin]; + const X_FLOAT rsq = delx * delx + dely * dely + delz * delz; + const X_FLOAT radsum = radi + other_x[m + 4 * atoms_per_bin]; + const X_FLOAT cutsq = (radsum + skin) * (radsum + skin); + + if(rsq <= cutsq) { + if(n<neigh_list.maxneighs) { + if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history; + else neighbors_i(n++) = j; + } + else n++; + } + } + } + __syncthreads(); + } + + if(i >= 0 && i < nlocal) { + neigh_list.d_numneigh(i) = n; + neigh_list.d_ilist(i) = i; + } + + if(n > neigh_list.maxneighs) { + resize() = 1; + + if(n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop + } + } +} +#endif + } namespace LAMMPS_NS { -template class NPairKokkos<LMPDeviceType,0,0,0>; -template class NPairKokkos<LMPDeviceType,0,1,0>; -template class NPairKokkos<LMPDeviceType,1,0,0>; -template class NPairKokkos<LMPDeviceType,1,1,0>; -template class NPairKokkos<LMPDeviceType,1,0,1>; +template class NPairKokkos<LMPDeviceType,0,0,0,0>; +template class NPairKokkos<LMPDeviceType,0,1,0,0>; +template class NPairKokkos<LMPDeviceType,1,0,0,0>; +template class NPairKokkos<LMPDeviceType,1,1,0,0>; +template class NPairKokkos<LMPDeviceType,1,0,1,0>; +template class NPairKokkos<LMPDeviceType,1,0,0,1>; +template class NPairKokkos<LMPDeviceType,1,0,1,1>; #ifdef KOKKOS_HAVE_CUDA -template class NPairKokkos<LMPHostType,0,0,0>; -template class NPairKokkos<LMPHostType,0,1,0>; -template class NPairKokkos<LMPHostType,1,0,0>; -template class NPairKokkos<LMPHostType,1,1,0>; -template class NPairKokkos<LMPHostType,1,0,1>; +template class NPairKokkos<LMPHostType,0,0,0,0>; +template class NPairKokkos<LMPHostType,0,1,0,0>; +template class NPairKokkos<LMPHostType,1,0,0,0>; +template class NPairKokkos<LMPHostType,1,1,0,0>; +template class NPairKokkos<LMPHostType,1,0,1,0>; +template class NPairKokkos<LMPHostType,1,0,0,1>; +template class NPairKokkos<LMPHostType,1,0,1,1>; #endif } diff --git a/src/KOKKOS/npair_kokkos.h b/src/KOKKOS/npair_kokkos.h index 6c1c0e958b49dcd09ff7bb6b32efca67b2d3f6a6..970e40c9fca0f2a54f2c6a342d5ac994b9d70fbc 100644 --- a/src/KOKKOS/npair_kokkos.h +++ b/src/KOKKOS/npair_kokkos.h @@ -13,56 +13,76 @@ #ifdef NPAIR_CLASS -typedef NPairKokkos<LMPHostType,0,0,0> NPairKokkosFullBinHost; +typedef NPairKokkos<LMPHostType,0,0,0,0> NPairKokkosFullBinHost; NPairStyle(full/bin/kk/host, NPairKokkosFullBinHost, NP_FULL | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI) -typedef NPairKokkos<LMPDeviceType,0,0,0> NPairKokkosFullBinDevice; +typedef NPairKokkos<LMPDeviceType,0,0,0,0> NPairKokkosFullBinDevice; NPairStyle(full/bin/kk/device, NPairKokkosFullBinDevice, NP_FULL | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI) -typedef NPairKokkos<LMPHostType,0,1,0> NPairKokkosFullBinGhostHost; +typedef NPairKokkos<LMPHostType,0,1,0,0> NPairKokkosFullBinGhostHost; NPairStyle(full/bin/ghost/kk/host, NPairKokkosFullBinGhostHost, NP_FULL | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI) -typedef NPairKokkos<LMPDeviceType,0,1,0> NPairKokkosFullBinGhostDevice; +typedef NPairKokkos<LMPDeviceType,0,1,0,0> NPairKokkosFullBinGhostDevice; NPairStyle(full/bin/ghost/kk/device, NPairKokkosFullBinGhostDevice, NP_FULL | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI) -typedef NPairKokkos<LMPHostType,1,0,0> NPairKokkosHalfBinHost; +typedef NPairKokkos<LMPHostType,1,0,0,0> NPairKokkosHalfBinHost; NPairStyle(half/bin/kk/host, NPairKokkosHalfBinHost, NP_HALF | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_ORTHO) -typedef NPairKokkos<LMPDeviceType,1,0,0> NPairKokkosHalfBinDevice; +typedef NPairKokkos<LMPDeviceType,1,0,0,0> NPairKokkosHalfBinDevice; NPairStyle(half/bin/kk/device, NPairKokkosHalfBinDevice, NP_HALF | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_ORTHO) -typedef NPairKokkos<LMPHostType,1,0,1> NPairKokkosHalfBinHostTri; +typedef NPairKokkos<LMPHostType,1,0,1,0> NPairKokkosHalfBinHostTri; NPairStyle(half/bin/kk/host, NPairKokkosHalfBinHostTri, NP_HALF | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_TRI) -typedef NPairKokkos<LMPDeviceType,1,0,1> NPairKokkosHalfBinDeviceTri; +typedef NPairKokkos<LMPDeviceType,1,0,1,0> NPairKokkosHalfBinDeviceTri; NPairStyle(half/bin/kk/device, NPairKokkosHalfBinDeviceTri, NP_HALF | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_TRI) -typedef NPairKokkos<LMPHostType,1,1,0> NPairKokkosHalfBinGhostHost; +typedef NPairKokkos<LMPHostType,1,1,0,0> NPairKokkosHalfBinGhostHost; NPairStyle(half/bin/ghost/kk/host, NPairKokkosHalfBinGhostHost, NP_HALF | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI) -typedef NPairKokkos<LMPDeviceType,1,1,0> NPairKokkosHalfBinGhostDevice; +typedef NPairKokkos<LMPDeviceType,1,1,0,0> NPairKokkosHalfBinGhostDevice; NPairStyle(half/bin/ghost/kk/device, NPairKokkosHalfBinGhostDevice, NP_HALF | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI) +typedef NPairKokkos<LMPHostType,1,0,0,1> NPairKokkosHalfSizeBinHost; +NPairStyle(half/size/bin/kk/host, + NPairKokkosHalfSizeBinHost, + NP_HALF | NP_SIZE | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_ORTHO) + +typedef NPairKokkos<LMPDeviceType,1,0,0,1> NPairKokkosHalfSizeBinDevice; +NPairStyle(half/size/bin/kk/device, + NPairKokkosHalfSizeBinDevice, + NP_HALF | NP_SIZE | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_ORTHO) + +typedef NPairKokkos<LMPHostType,1,0,1,1> NPairKokkosHalfSizeBinHostTri; +NPairStyle(half/size/bin/kk/host, + NPairKokkosHalfSizeBinHostTri, + NP_HALF | NP_SIZE | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_TRI) + +typedef NPairKokkos<LMPDeviceType,1,0,1,1> NPairKokkosHalfSizeBinDeviceTri; +NPairStyle(half/size/bin/kk/device, + NPairKokkosHalfSizeBinDeviceTri, + NP_HALF | NP_SIZE | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_TRI) + #else #ifndef LMP_NPAIR_KOKKOS_H @@ -73,7 +93,7 @@ NPairStyle(half/bin/ghost/kk/device, namespace LAMMPS_NS { -template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI> +template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI, int SIZE> class NPairKokkos : public NPair { public: NPairKokkos(class LAMMPS *); @@ -162,6 +182,7 @@ class NeighborKokkosExecute // data from Atom class const typename AT::t_x_array_randomread x; + const typename AT::t_float_1d radius; const typename AT::t_int_1d_const type,mask; const typename AT::t_tagint_1d_const molecule; const typename AT::t_tagint_1d_const tag; @@ -188,6 +209,9 @@ class NeighborKokkosExecute const int xperiodic, yperiodic, zperiodic; const int xprd_half, yprd_half, zprd_half; + // GRANULAR required member variables + const X_FLOAT skin; + NeighborKokkosExecute( const NeighListKokkos<DeviceType> &_neigh_list, const typename AT::t_xfloat_2d_randomread &_cutneighsq, @@ -199,6 +223,7 @@ class NeighborKokkosExecute const typename AT::t_int_1d_3 &_d_stencilxyz, const int _nlocal, const typename AT::t_x_array_randomread &_x, + const typename AT::t_float_1d &_radius, const typename AT::t_int_1d_const &_type, const typename AT::t_int_1d_const &_mask, const typename AT::t_tagint_1d_const &_molecule, @@ -225,13 +250,14 @@ class NeighborKokkosExecute const typename AT::t_int_1d_const & _ex_mol_intra, const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo, const int & _xperiodic, const int & _yperiodic, const int & _zperiodic, - const int & _xprd_half, const int & _yprd_half, const int & _zprd_half): + const int & _xprd_half, const int & _yprd_half, const int & _zprd_half, + const X_FLOAT _skin): neigh_list(_neigh_list), cutneighsq(_cutneighsq), bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins), atom2bin(_atom2bin),c_atom2bin(_atom2bin), nstencil(_nstencil),d_stencil(_d_stencil),d_stencilxyz(_d_stencilxyz), nlocal(_nlocal), - x(_x),type(_type),mask(_mask),molecule(_molecule), + x(_x),radius(_radius),type(_type),mask(_mask),molecule(_molecule), tag(_tag),special(_special),nspecial(_nspecial),molecular(_molecular), nbinx(_nbinx),nbiny(_nbiny),nbinz(_nbinz), mbinx(_mbinx),mbiny(_mbiny),mbinz(_mbinz), @@ -245,7 +271,8 @@ class NeighborKokkosExecute ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit), ex_mol_intra(_ex_mol_intra), xperiodic(_xperiodic),yperiodic(_yperiodic),zperiodic(_zperiodic), - xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half) { + xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half), + skin(_skin) { if (molecular == 2) moltemplate = 1; else moltemplate = 0; @@ -280,10 +307,18 @@ class NeighborKokkosExecute KOKKOS_FUNCTION void build_Item_Ghost(const int &i) const; + template<int HalfNeigh, int Newton, int Tri> + KOKKOS_FUNCTION + void build_ItemSize(const int &i) const; + #ifdef KOKKOS_HAVE_CUDA template<int HalfNeigh, int Newton, int Tri> __device__ inline void build_ItemCuda(typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const; + + template<int HalfNeigh, int Newton, int Tri> + __device__ inline + void build_ItemSizeCuda(typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const; #endif KOKKOS_INLINE_FUNCTION @@ -399,6 +434,48 @@ struct NPairKokkosBuildFunctorGhost { } }; +template <class DeviceType, int HALF_NEIGH, int GHOST_NEWTON, int TRI> +struct NPairKokkosBuildFunctorSize { + typedef DeviceType device_type; + + const NeighborKokkosExecute<DeviceType> c; + const size_t sharedsize; + + NPairKokkosBuildFunctorSize(const NeighborKokkosExecute<DeviceType> &_c, + const size_t _sharedsize): c(_c), sharedsize(_sharedsize) {}; + + KOKKOS_INLINE_FUNCTION + void operator() (const int & i) const { + c.template build_ItemSize<HALF_NEIGH,GHOST_NEWTON,TRI>(i); + } + +#ifdef KOKKOS_HAVE_CUDA + __device__ inline + void operator() (typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const { + c.template build_ItemSizeCuda<HALF_NEIGH,GHOST_NEWTON,TRI>(dev); + } + size_t shmem_size(const int team_size) const { (void) team_size; return sharedsize; } +#endif +}; + +template <int HALF_NEIGH, int GHOST_NEWTON, int TRI> +struct NPairKokkosBuildFunctorSize<LMPHostType,HALF_NEIGH,GHOST_NEWTON,TRI> { + typedef LMPHostType device_type; + + const NeighborKokkosExecute<LMPHostType> c; + const size_t sharedsize; + + NPairKokkosBuildFunctorSize(const NeighborKokkosExecute<LMPHostType> &_c, + const size_t _sharedsize): c(_c), sharedsize(_sharedsize) {}; + + KOKKOS_INLINE_FUNCTION + void operator() (const int & i) const { + c.template build_ItemSize<HALF_NEIGH,GHOST_NEWTON,TRI>(i); + } + + void operator() (typename Kokkos::TeamPolicy<LMPHostType>::member_type dev) const {} // Should error out +}; + } #endif diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp index 038d95394ac6947036e44e39c0f684251e5ddd31..3282c9da1e24057e8b8169faf8e525a035cf25f5 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp @@ -186,7 +186,7 @@ void PairDPDfdtEnergyKokkos<DeviceType>::compute(int eflag_in, int vflag_in) f = atomKK->k_f.view<DeviceType>(); type = atomKK->k_type.view<DeviceType>(); mass = atomKK->k_mass.view<DeviceType>(); - rmass = atomKK->rmass; + rmass = atomKK->k_rmass.view<DeviceType>(); dpdTheta = atomKK->k_dpdTheta.view<DeviceType>(); k_cutsq.template sync<DeviceType>(); @@ -564,7 +564,7 @@ void PairDPDfdtEnergyKokkos<DeviceType>::operator()(TagPairDPDfdtEnergyComputeNo a_f(j,2) -= delz*fpair; } - if (rmass) { + if (rmass.data()) { mass_i = rmass[i]; mass_j = rmass[j]; } else { diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h index 12ffb668e200a01e51555376c78f235ee0739358..9d316df1524faad52fce1d96c8cadedfe553248d 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h @@ -132,7 +132,7 @@ class PairDPDfdtEnergyKokkos : public PairDPDfdtEnergy { typename ArrayTypes<DeviceType>::t_f_array f; typename ArrayTypes<DeviceType>::t_int_1d_randomread type; typename ArrayTypes<DeviceType>::t_float_1d_randomread mass; - double *rmass; + typename ArrayTypes<DeviceType>::t_float_1d rmass; typename AT::t_efloat_1d dpdTheta; typename AT::t_efloat_1d d_duCond,d_duMech; HAT::t_efloat_1d h_duCond,h_duMech; diff --git a/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp b/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cd8beed583a4f29e3990a5723bed0e17cb243bca --- /dev/null +++ b/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp @@ -0,0 +1,587 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "pair_gran_hooke_history_kokkos.h" +#include "kokkos.h" +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "memory_kokkos.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "error.h" +#include "modify.h" +#include "fix_neigh_history_kokkos.h" +#include "update.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +PairGranHookeHistoryKokkos<DeviceType>::PairGranHookeHistoryKokkos(LAMMPS *lmp) : PairGranHookeHistory(lmp) +{ + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice<DeviceType>::space; + datamask_read = X_MASK | V_MASK | OMEGA_MASK | F_MASK | TORQUE_MASK | TYPE_MASK | MASK_MASK | ENERGY_MASK | VIRIAL_MASK | RMASS_MASK | RADIUS_MASK; + datamask_modify = F_MASK | TORQUE_MASK | ENERGY_MASK | VIRIAL_MASK; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +PairGranHookeHistoryKokkos<DeviceType>::~PairGranHookeHistoryKokkos() +{ + if (copymode) return; + + if (allocated) { + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); + eatom = NULL; + vatom = NULL; + } +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +template<class DeviceType> +void PairGranHookeHistoryKokkos<DeviceType>::init_style() +{ + if (history && fix_history == NULL) { + char dnumstr[16]; + sprintf(dnumstr,"%d",3); + char **fixarg = new char*[4]; + fixarg[0] = (char *) "NEIGH_HISTORY"; + fixarg[1] = (char *) "all"; + if (execution_space == Device) + fixarg[2] = (char *) "NEIGH_HISTORY/KK/DEVICE"; + else + fixarg[2] = (char *) "NEIGH_HISTORY/KK/HOST"; + fixarg[3] = dnumstr; + modify->add_fix(4,fixarg,1); + delete [] fixarg; + fix_history = (FixNeighHistory *) modify->fix[modify->nfix-1]; + fix_history->pair = this; + fix_historyKK = (FixNeighHistoryKokkos<DeviceType> *)fix_history; + } + + PairGranHookeHistory::init_style(); + + // irequest = neigh request made by parent class + + neighflag = lmp->kokkos->neighflag; + int irequest = neighbor->nrequest - 1; + + neighbor->requests[irequest]-> + kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value && + !Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value; + neighbor->requests[irequest]-> + kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value; + + if (neighflag == HALF || neighflag == HALFTHREAD) { + neighbor->requests[irequest]->full = 0; + neighbor->requests[irequest]->half = 1; + } else { + error->all(FLERR,"Cannot use chosen neighbor list style with gran/hooke/history/kk"); + } +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +void PairGranHookeHistoryKokkos<DeviceType>::compute(int eflag_in, int vflag_in) +{ + copymode = 1; + + eflag = eflag_in; + vflag = vflag_in; + + if (neighflag == FULL) no_virial_fdotr_compute = 1; + + if (eflag || vflag) ev_setup(eflag,vflag,0); + else evflag = vflag_fdotr = 0; + + int shearupdate = 1; + if (update->setupflag) shearupdate = 0; + + // reallocate per-atom arrays if necessary + + if (eflag_atom) { + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + d_eatom = k_eatom.view<DeviceType>(); + } + if (vflag_atom) { + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + d_vatom = k_vatom.view<DeviceType>(); + } + + atomKK->sync(execution_space,datamask_read); + if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); + else atomKK->modified(execution_space,F_MASK | TORQUE_MASK); + + x = atomKK->k_x.view<DeviceType>(); + v = atomKK->k_v.view<DeviceType>(); + omega = atomKK->k_omega.view<DeviceType>(); + c_x = atomKK->k_x.view<DeviceType>(); + f = atomKK->k_f.view<DeviceType>(); + torque = atomKK->k_torque.view<DeviceType>(); + type = atomKK->k_type.view<DeviceType>(); + mask = atomKK->k_mask.view<DeviceType>(); + tag = atomKK->k_tag.view<DeviceType>(); + rmass = atomKK->k_rmass.view<DeviceType>(); + radius = atomKK->k_radius.view<DeviceType>(); + nlocal = atom->nlocal; + nall = atom->nlocal + atom->nghost; + newton_pair = force->newton_pair; + special_lj[0] = force->special_lj[0]; + special_lj[1] = force->special_lj[1]; + special_lj[2] = force->special_lj[2]; + special_lj[3] = force->special_lj[3]; + + int inum = list->inum; + NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list); + d_numneigh = k_list->d_numneigh; + d_neighbors = k_list->d_neighbors; + d_ilist = k_list->d_ilist; + + if (d_numneigh.extent(0) != d_numneigh_touch.extent(0)) + d_numneigh_touch = typename AT::t_int_1d("pair:numneigh_touch",d_numneigh.extent(0)); + if (d_neighbors.extent(0) != d_neighbors_touch.extent(0) || + d_neighbors.extent(1) != d_neighbors_touch.extent(1)) + d_neighbors_touch = typename AT::t_neighbors_2d("pair:neighbors_touch",d_neighbors.extent(0),d_neighbors.extent(1)); + + d_firsttouch = fix_historyKK->d_firstflag; + d_firstshear = fix_historyKK->d_firstvalue; + + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryReduce>(0,inum),*this); + + EV_FLOAT ev; + + if (lmp->kokkos->neighflag == HALF) { + if (force->newton_pair) { + if (vflag_atom) { + if (shearupdate) { + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,1,2,1>>(0,inum),*this); + } else { + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,1,2,0>>(0,inum),*this); + } + } else if (vflag_global) { + if (shearupdate) { + Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,1,1,1>>(0,inum),*this, ev); + } else { + Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,1,1,0>>(0,inum),*this, ev); + } + } else { + if (shearupdate) { + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,1,0,1>>(0,inum),*this); + } else { + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,1,0,0>>(0,inum),*this); + } + } + } else { + if (vflag_atom) { + if (shearupdate) { + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,0,2,1>>(0,inum),*this); + } else { + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,0,2,0>>(0,inum),*this); + } + } else if (vflag_global) { + if (shearupdate) { + Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,0,1,1>>(0,inum),*this, ev); + } else { + Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,0,1,0>>(0,inum),*this, ev); + } + } else { + if (shearupdate) { + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,0,0,1>>(0,inum),*this); + } else { + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,0,0,0>>(0,inum),*this); + } + } + } + } else { // HALFTHREAD + if (force->newton_pair) { + if (vflag_atom) { + if (shearupdate) { + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,1,2,1>>(0,inum),*this); + } else { + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,1,2,0>>(0,inum),*this); + } + } else if (vflag_global) { + if (shearupdate) { + Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,1,1,1>>(0,inum),*this, ev); + } else { + Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,1,1,0>>(0,inum),*this, ev); + } + } else { + if (shearupdate) { + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,1,0,1>>(0,inum),*this); + } else { + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,1,0,0>>(0,inum),*this); + } + } + } else { + if (vflag_atom) { + if (shearupdate) { + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,0,2,1>>(0,inum),*this); + } else { + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,0,2,0>>(0,inum),*this); + } + } else if (vflag_global) { + if (shearupdate) { + Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,0,1,1>>(0,inum),*this, ev); + } else { + Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,0,1,0>>(0,inum),*this, ev); + } + } else { + if (shearupdate) { + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,0,0,1>>(0,inum),*this); + } else { + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,0,0,0>>(0,inum),*this); + } + } + } + } + + if (vflag_global) { + virial[0] += ev.v[0]; + virial[1] += ev.v[1]; + virial[2] += ev.v[2]; + virial[3] += ev.v[3]; + virial[4] += ev.v[4]; + virial[5] += ev.v[5]; + } + + if (vflag_atom) { + k_vatom.template modify<DeviceType>(); + k_vatom.template sync<LMPHostType>(); + } + + if (vflag_fdotr) pair_virial_fdotr_compute(this); + + copymode = 0; +} + +template<class DeviceType> +KOKKOS_INLINE_FUNCTION +void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryReduce, const int ii) const { + const int i = d_ilist[ii]; + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + const LMP_FLOAT imass = rmass[i]; + const LMP_FLOAT irad = radius[i]; + const int jnum = d_numneigh[i]; + int count = 0; + + for (int jj = 0; jj < jnum; jj++) { + const int j = d_neighbors(i,jj) & NEIGHMASK; + + const X_FLOAT delx = xtmp - x(j,0); + const X_FLOAT dely = ytmp - x(j,1); + const X_FLOAT delz = ztmp - x(j,2); + const X_FLOAT rsq = delx*delx + dely*dely + delz*delz; + const LMP_FLOAT jmass = rmass[j]; + const LMP_FLOAT jrad = radius[j]; + const LMP_FLOAT radsum = irad + jrad; + + // check for touching neighbors + + if (rsq >= radsum * radsum) { + d_firsttouch(i,jj) = 0; + d_firstshear(i,3*jj) = 0; + d_firstshear(i,3*jj+1) = 0; + d_firstshear(i,3*jj+2) = 0; + } else { + d_firsttouch(i,jj) = 1; + d_neighbors_touch(i,count++) = jj; + } + } + d_numneigh_touch[i] = count; +} + +template<class DeviceType> +template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int SHEARUPDATE> +KOKKOS_INLINE_FUNCTION +void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,SHEARUPDATE>, const int ii, EV_FLOAT &ev) const { + + // The f and torque arrays are atomic for Half/Thread neighbor style + Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f; + Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_torque = torque; + + const int i = d_ilist[ii]; + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + const LMP_FLOAT imass = rmass[i]; + const LMP_FLOAT irad = radius[i]; + const int jnum = d_numneigh_touch[i]; + + F_FLOAT fx_i = 0.0; + F_FLOAT fy_i = 0.0; + F_FLOAT fz_i = 0.0; + + F_FLOAT torquex_i = 0.0; + F_FLOAT torquey_i = 0.0; + F_FLOAT torquez_i = 0.0; + + for (int jj = 0; jj < jnum; jj++) { + const int m = d_neighbors_touch(i, jj); + const int j = d_neighbors(i, m) & NEIGHMASK; + + const X_FLOAT delx = xtmp - x(j,0); + const X_FLOAT dely = ytmp - x(j,1); + const X_FLOAT delz = ztmp - x(j,2); + const X_FLOAT rsq = delx*delx + dely*dely + delz*delz; + const LMP_FLOAT jmass = rmass[j]; + const LMP_FLOAT jrad = radius[j]; + const LMP_FLOAT radsum = irad + jrad; + + // check for touching neighbors + + const LMP_FLOAT r = sqrt(rsq); + const LMP_FLOAT rinv = 1.0/r; + const LMP_FLOAT rsqinv = 1/rsq; + + // relative translational velocity + + V_FLOAT vr1 = v(i,0) - v(j,0); + V_FLOAT vr2 = v(i,1) - v(j,1); + V_FLOAT vr3 = v(i,2) - v(j,2); + + // normal component + + V_FLOAT vnnr = vr1*delx + vr2*dely + vr3*delz; + V_FLOAT vn1 = delx*vnnr * rsqinv; + V_FLOAT vn2 = dely*vnnr * rsqinv; + V_FLOAT vn3 = delz*vnnr * rsqinv; + + // tangential component + + V_FLOAT vt1 = vr1 - vn1; + V_FLOAT vt2 = vr2 - vn2; + V_FLOAT vt3 = vr3 - vn3; + + // relative rotational velocity + + V_FLOAT wr1 = (irad*omega(i,0) + jrad*omega(j,0)) * rinv; + V_FLOAT wr2 = (irad*omega(i,1) + jrad*omega(j,1)) * rinv; + V_FLOAT wr3 = (irad*omega(i,2) + jrad*omega(j,2)) * rinv; + + LMP_FLOAT meff = imass*jmass / (imass+jmass); + if (mask[i] & freeze_group_bit) meff = jmass; + if (mask[j] & freeze_group_bit) meff = imass; + + F_FLOAT damp = meff*gamman*vnnr*rsqinv; + F_FLOAT ccel = kn*(radsum-r)*rinv - damp; + + // relative velocities + + V_FLOAT vtr1 = vt1 - (delz*wr2-dely*wr3); + V_FLOAT vtr2 = vt2 - (delx*wr3-delz*wr1); + V_FLOAT vtr3 = vt3 - (dely*wr1-delx*wr2); + V_FLOAT vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3; + vrel = sqrt(vrel); + + // shear history effects + + X_FLOAT shear1 = d_firstshear(i,3*m); + X_FLOAT shear2 = d_firstshear(i,3*m+1); + X_FLOAT shear3 = d_firstshear(i,3*m+2); + if (SHEARUPDATE) { + shear1 += vtr1*dt; + shear2 += vtr2*dt; + shear3 += vtr3*dt; + } + X_FLOAT shrmag = sqrt(shear1*shear1 + shear2*shear2 + + shear3*shear3); + + // rotate shear displacements + + X_FLOAT rsht = shear1*delx + shear2*dely + shear3*delz; + rsht *= rsqinv; + if (SHEARUPDATE) { + shear1 -= rsht*delx; + shear2 -= rsht*dely; + shear3 -= rsht*delz; + } + + // tangential forces = shear + tangential velocity damping + + F_FLOAT fs1 = - (kt*shear1 + meff*gammat*vtr1); + F_FLOAT fs2 = - (kt*shear2 + meff*gammat*vtr2); + F_FLOAT fs3 = - (kt*shear3 + meff*gammat*vtr3); + + // rescale frictional displacements and forces if needed + + F_FLOAT fs = sqrt(fs1*fs1 + fs2*fs2 + fs3*fs3); + F_FLOAT fn = xmu * fabs(ccel*r); + + if (fs > fn) { + if (shrmag != 0.0) { + shear1 = (fn/fs) * (shear1 + meff*gammat*vtr1/kt) - + meff*gammat*vtr1/kt; + shear2 = (fn/fs) * (shear2 + meff*gammat*vtr2/kt) - + meff*gammat*vtr2/kt; + shear3 = (fn/fs) * (shear3 + meff*gammat*vtr3/kt) - + meff*gammat*vtr3/kt; + fs1 *= fn/fs; + fs2 *= fn/fs; + fs3 *= fn/fs; + } else fs1 = fs2 = fs3 = 0.0; + } + + if (SHEARUPDATE) { + d_firstshear(i,3*m) = shear1; + d_firstshear(i,3*m+1) = shear2; + d_firstshear(i,3*m+2) = shear3; + } + + // forces & torques + + F_FLOAT fx = delx*ccel + fs1; + F_FLOAT fy = dely*ccel + fs2; + F_FLOAT fz = delz*ccel + fs3; + fx_i += fx; + fy_i += fy; + fz_i += fz; + + F_FLOAT tor1 = rinv * (dely*fs3 - delz*fs2); + F_FLOAT tor2 = rinv * (delz*fs1 - delx*fs3); + F_FLOAT tor3 = rinv * (delx*fs2 - dely*fs1); + torquex_i -= irad*tor1; + torquey_i -= irad*tor2; + torquez_i -= irad*tor3; + + if (NEWTON_PAIR || j < nlocal) { + a_f(j,0) -= fx; + a_f(j,1) -= fy; + a_f(j,2) -= fz; + a_torque(j,0) -= jrad*tor1; + a_torque(j,1) -= jrad*tor2; + a_torque(j,2) -= jrad*tor3; + } + + if (EVFLAG == 2) + ev_tally_xyz_atom<NEIGHFLAG, NEWTON_PAIR>(ev, i, j, fx_i, fy_i, fz_i, delx, dely, delz); + if (EVFLAG == 1) + ev_tally_xyz<NEWTON_PAIR>(ev, i, j, fx_i, fy_i, fz_i, delx, dely, delz); + } + + a_f(i,0) += fx_i; + a_f(i,1) += fy_i; + a_f(i,2) += fz_i; + a_torque(i,0) += torquex_i; + a_torque(i,1) += torquey_i; + a_torque(i,2) += torquez_i; +} + + +template<class DeviceType> +template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int SHEARUPDATE> +KOKKOS_INLINE_FUNCTION +void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,SHEARUPDATE>, const int ii) const { + EV_FLOAT ev; + this->template operator()<NEIGHFLAG,NEWTON_PAIR,EVFLAG,SHEARUPDATE>(TagPairGranHookeHistoryCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,SHEARUPDATE>(), ii, ev); +} + +template<class DeviceType> +template<int NEWTON_PAIR> +KOKKOS_INLINE_FUNCTION +void PairGranHookeHistoryKokkos<DeviceType>::ev_tally_xyz(EV_FLOAT &ev, int i, int j, + F_FLOAT fx, F_FLOAT fy, F_FLOAT fz, + X_FLOAT delx, X_FLOAT dely, X_FLOAT delz) const +{ + F_FLOAT v[6]; + + v[0] = delx*fx; + v[1] = dely*fy; + v[2] = delz*fz; + v[3] = delx*fy; + v[4] = delx*fz; + v[5] = dely*fz; + + if (NEWTON_PAIR) { + ev.v[0] += v[0]; + ev.v[1] += v[1]; + ev.v[2] += v[2]; + ev.v[3] += v[3]; + ev.v[4] += v[4]; + ev.v[5] += v[5]; + } else { + if (i < nlocal) { + ev.v[0] += 0.5*v[0]; + ev.v[1] += 0.5*v[1]; + ev.v[2] += 0.5*v[2]; + ev.v[3] += 0.5*v[3]; + ev.v[4] += 0.5*v[4]; + ev.v[5] += 0.5*v[5]; + } + if (j < nlocal) { + ev.v[0] += 0.5*v[0]; + ev.v[1] += 0.5*v[1]; + ev.v[2] += 0.5*v[2]; + ev.v[3] += 0.5*v[3]; + ev.v[4] += 0.5*v[4]; + ev.v[5] += 0.5*v[5]; + } + } +} + +template<class DeviceType> +template<int NEIGHFLAG, int NEWTON_PAIR> +KOKKOS_INLINE_FUNCTION +void PairGranHookeHistoryKokkos<DeviceType>::ev_tally_xyz_atom(EV_FLOAT &ev, int i, int j, + F_FLOAT fx, F_FLOAT fy, F_FLOAT fz, + X_FLOAT delx, X_FLOAT dely, X_FLOAT delz) const +{ + Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_vatom = k_vatom.view<DeviceType>(); + + F_FLOAT v[6]; + + v[0] = delx*fx; + v[1] = dely*fy; + v[2] = delz*fz; + v[3] = delx*fy; + v[4] = delx*fz; + v[5] = dely*fz; + + if (NEWTON_PAIR || i < nlocal) { + v_vatom(i,0) += 0.5*v[0]; + v_vatom(i,1) += 0.5*v[1]; + v_vatom(i,2) += 0.5*v[2]; + v_vatom(i,3) += 0.5*v[3]; + v_vatom(i,4) += 0.5*v[4]; + v_vatom(i,5) += 0.5*v[5]; + } + if (NEWTON_PAIR || j < nlocal) { + v_vatom(j,0) += 0.5*v[0]; + v_vatom(j,1) += 0.5*v[1]; + v_vatom(j,2) += 0.5*v[2]; + v_vatom(j,3) += 0.5*v[3]; + v_vatom(j,4) += 0.5*v[4]; + v_vatom(j,5) += 0.5*v[5]; + } +} + +namespace LAMMPS_NS { +template class PairGranHookeHistoryKokkos<LMPDeviceType>; +#ifdef KOKKOS_HAVE_CUDA +template class PairGranHookeHistoryKokkos<LMPHostType>; +#endif +} diff --git a/src/KOKKOS/pair_gran_hooke_history_kokkos.h b/src/KOKKOS/pair_gran_hooke_history_kokkos.h new file mode 100644 index 0000000000000000000000000000000000000000..822b9203a46ee5945c01eec2e353ca67be42385b --- /dev/null +++ b/src/KOKKOS/pair_gran_hooke_history_kokkos.h @@ -0,0 +1,150 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(gran/hooke/history/kk,PairGranHookeHistoryKokkos<LMPDeviceType>) +PairStyle(gran/hooke/history/kk/device,PairGranHookeHistoryKokkos<LMPDeviceType>) +PairStyle(gran/hooke/history/kk/host,PairGranHookeHistoryKokkos<LMPHostType>) + +#else + +#ifndef LMP_PAIR_GRAN_HOOKE_HISTORY_KOKKOS_H +#define LMP_PAIR_GRAN_HOOKE_HISTORY_KOKKOS_H + +#include "pair_gran_hooke_history.h" +#include "pair_kokkos.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +template <class DeviceType> +class FixNeighHistoryKokkos; + +template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int SHEARUPDATE> +struct TagPairGranHookeHistoryCompute {}; + +struct TagPairGranHookeHistoryReduce {}; + +template <class DeviceType> +class PairGranHookeHistoryKokkos : public PairGranHookeHistory { + public: + typedef DeviceType device_type; + typedef ArrayTypes<DeviceType> AT; + typedef EV_FLOAT value_type; + + PairGranHookeHistoryKokkos(class LAMMPS *); + virtual ~PairGranHookeHistoryKokkos(); + virtual void compute(int, int); + void init_style(); + + KOKKOS_INLINE_FUNCTION + void operator()(TagPairGranHookeHistoryReduce, const int ii) const; + + template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int SHEARUPDATE> + KOKKOS_INLINE_FUNCTION + void operator()(TagPairGranHookeHistoryCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,SHEARUPDATE>, const int, EV_FLOAT &ev) const; + template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int SHEARUPDATE> + KOKKOS_INLINE_FUNCTION + void operator()(TagPairGranHookeHistoryCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,SHEARUPDATE>, const int) const; + + template<int NEWTON_PAIR> + KOKKOS_INLINE_FUNCTION + void ev_tally_xyz(EV_FLOAT &ev, int i, int j, + F_FLOAT fx, F_FLOAT fy, F_FLOAT fz, + X_FLOAT delx, X_FLOAT dely, X_FLOAT delz) const; + template<int NEIGHFLAG, int NEWTON_PAIR> + KOKKOS_INLINE_FUNCTION + void ev_tally_xyz_atom(EV_FLOAT &ev, int i, int j, + F_FLOAT fx, F_FLOAT fy, F_FLOAT fz, + X_FLOAT delx, X_FLOAT dely, X_FLOAT delz) const; + + protected: + typename AT::t_x_array_randomread x; + typename AT::t_x_array c_x; + typename AT::t_v_array_randomread v; + typename AT::t_v_array_randomread omega; + typename AT::t_f_array f; + typename AT::t_f_array torque; + typename AT::t_int_1d_randomread type; + typename AT::t_int_1d_randomread mask; + typename AT::t_float_1d_randomread rmass; + typename AT::t_float_1d_randomread radius; + + DAT::tdual_efloat_1d k_eatom; + DAT::tdual_virial_array k_vatom; + typename AT::t_efloat_1d d_eatom; + typename AT::t_virial_array d_vatom; + typename AT::t_tagint_1d tag; + + typename AT::t_neighbors_2d d_neighbors; + typename AT::t_int_1d_randomread d_ilist; + typename AT::t_int_1d_randomread d_numneigh; + + typename Kokkos::View<int**> d_firsttouch; + typename Kokkos::View<LMP_FLOAT**> d_firstshear; + + typename AT::t_neighbors_2d d_neighbors_touch; + typename AT::t_int_1d d_numneigh_touch; + + int newton_pair; + double special_lj[4]; + + int neighflag; + int nlocal,nall,eflag,vflag; + + FixNeighHistoryKokkos<DeviceType> *fix_historyKK; + + friend void pair_virial_fdotr_compute<PairGranHookeHistoryKokkos>(PairGranHookeHistoryKokkos*); +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Illegal ... command + +Self-explanatory. Check the input script syntax and compare to the +documentation for the command. You can use -echo screen as a +command-line option when running LAMMPS to see the offending line. + +E: Incorrect args for pair coefficients + +Self-explanatory. Check the input script or data file. + +E: Pair granular requires atom attributes radius, rmass + +The atom style defined does not have these attributes. + +E: Pair granular requires ghost atoms store velocity + +Use the comm_modify vel yes command to enable this. + +E: Could not find pair fix neigh history ID + +UNDOCUMENTED + +U: Pair granular with shear history requires newton pair off + +This is a current restriction of the implementation of pair +granular styles with history. + +U: Could not find pair fix ID + +A fix is created internally by the pair style to store shear +history information. You cannot delete it. + +*/ diff --git a/src/KOKKOS/verlet_kokkos.cpp b/src/KOKKOS/verlet_kokkos.cpp index 136e543a2cd708501004d2f94290b21454dabe0a..f56e6015d014018550e636dd1c8dfcb414c7e8e5 100644 --- a/src/KOKKOS/verlet_kokkos.cpp +++ b/src/KOKKOS/verlet_kokkos.cpp @@ -55,6 +55,20 @@ struct ForceAdder { /* ---------------------------------------------------------------------- */ +template<class View> +struct Zero { + View v; + Zero(const View &v_):v(v_) {} + KOKKOS_INLINE_FUNCTION + void operator()(const int &i) const { + v(i,0) = 0; + v(i,1) = 0; + v(i,2) = 0; + } +}; + +/* ---------------------------------------------------------------------- */ + VerletKokkos::VerletKokkos(LAMMPS *lmp, int narg, char **arg) : Verlet(lmp, narg, arg) { @@ -120,6 +134,7 @@ void VerletKokkos::setup(int flag) atomKK->modified(Host,ALL_MASK); neighbor->build(1); + modify->setup_post_neighbor(); neighbor->ncalls = 0; // compute all forces @@ -175,7 +190,7 @@ void VerletKokkos::setup(int flag) modify->setup(vflag); output->setup(flag); lmp->kokkos->auto_sync = 1; - update->setupflag = 1; + update->setupflag = 0; } /* ---------------------------------------------------------------------- @@ -223,6 +238,7 @@ void VerletKokkos::setup_minimal(int flag) atomKK->modified(Host,ALL_MASK); neighbor->build(1); + modify->setup_post_neighbor(); neighbor->ncalls = 0; } @@ -567,32 +583,25 @@ void VerletKokkos::run(int n) void VerletKokkos::force_clear() { - int i; - if (external_force_clear) return; + atomKK->k_f.modified_host() = 0; // ignore host forces/torques since device views + atomKK->k_torque.modified_host() = 0; // will be cleared below + // clear force on all particles // if either newton flag is set, also include ghosts // when using threads always clear all forces. if (neighbor->includegroup == 0) { - int nall; - if (force->newton) nall = atomKK->nlocal + atomKK->nghost; - else nall = atomKK->nlocal; - - size_t nbytes = sizeof(double) * nall; + int nall = atomKK->nlocal; + if (force->newton) nall += atomKK->nghost; - if (nbytes) { - if (atomKK->k_f.modified_host() > atomKK->k_f.modified_device()) { - memset_kokkos(atomKK->k_f.view<LMPHostType>()); - atomKK->modified(Host,F_MASK); - atomKK->sync(Device,F_MASK); - } else { - memset_kokkos(atomKK->k_f.view<LMPDeviceType>()); - atomKK->modified(Device,F_MASK); - } - if (torqueflag) memset(&(atomKK->torque[0][0]),0,3*nbytes); + Kokkos::parallel_for(nall, Zero<typename ArrayTypes<LMPDeviceType>::t_f_array>(atomKK->k_f.view<LMPDeviceType>())); + atomKK->modified(Device,F_MASK); + if (torqueflag) { + Kokkos::parallel_for(nall, Zero<typename ArrayTypes<LMPDeviceType>::t_f_array>(atomKK->k_torque.view<LMPDeviceType>())); + atomKK->modified(Device,TORQUE_MASK); } // neighbor includegroup flag is set @@ -600,35 +609,23 @@ void VerletKokkos::force_clear() // if either newton flag is set, also include ghosts } else { - int nall = atomKK->nfirst; - if (atomKK->k_f.modified_host() > atomKK->k_f.modified_device()) { - memset_kokkos(atomKK->k_f.view<LMPHostType>()); - atomKK->modified(Host,F_MASK); - } else { - memset_kokkos(atomKK->k_f.view<LMPDeviceType>()); - atomKK->modified(Device,F_MASK); - } + Kokkos::parallel_for(atomKK->nfirst, Zero<typename ArrayTypes<LMPDeviceType>::t_f_array>(atomKK->k_f.view<LMPDeviceType>())); + atomKK->modified(Device,F_MASK); + if (torqueflag) { - double **torque = atomKK->torque; - for (i = 0; i < nall; i++) { - torque[i][0] = 0.0; - torque[i][1] = 0.0; - torque[i][2] = 0.0; - } + Kokkos::parallel_for(atomKK->nfirst, Zero<typename ArrayTypes<LMPDeviceType>::t_f_array>(atomKK->k_torque.view<LMPDeviceType>())); + atomKK->modified(Device,TORQUE_MASK); } if (force->newton) { - nall = atomKK->nlocal + atomKK->nghost; + auto range = Kokkos::RangePolicy<LMPDeviceType>(atomKK->nlocal, atomKK->nlocal + atomKK->nghost); + Kokkos::parallel_for(range, Zero<typename ArrayTypes<LMPDeviceType>::t_f_array>(atomKK->k_f.view<LMPDeviceType>())); + atomKK->modified(Device,F_MASK); if (torqueflag) { - double **torque = atomKK->torque; - for (i = atomKK->nlocal; i < nall; i++) { - torque[i][0] = 0.0; - torque[i][1] = 0.0; - torque[i][2] = 0.0; - } + Kokkos::parallel_for(range, Zero<typename ArrayTypes<LMPDeviceType>::t_f_array>(atomKK->k_torque.view<LMPDeviceType>())); + atomKK->modified(Device,TORQUE_MASK); } - } } } diff --git a/src/USER-VTK/dump_vtk.cpp b/src/USER-VTK/dump_vtk.cpp index 5f2e3d62ec433e4cd77e80852aa1e3f5e88ad913..edbc647a81857c954ac4ed902d98de4063975b84 100644 --- a/src/USER-VTK/dump_vtk.cpp +++ b/src/USER-VTK/dump_vtk.cpp @@ -93,10 +93,10 @@ enum{VTK,VTP,VTU,PVTP,PVTU}; // file formats #define ONEFIELD 32 #define DELTA 1048576 -#if VTK_MAJOR_VERSION == 7 +#if VTK_MAJOR_VERSION >= 7 #define InsertNextTupleValue InsertNextTypedTuple -#elif VTK_MAJOR_VERSION > 7 -#error This code has only been tested with VTK 5, 6, and 7 +#elif VTK_MAJOR_VERSION > 8 +#error This code has only been tested with VTK 5, 6, 7, and 8 #endif /* ---------------------------------------------------------------------- */ diff --git a/src/comm_brick.cpp b/src/comm_brick.cpp index 4015240af890523eb23ef7acfd1143a72880f87c..e96b0f38e3ac4576eaabab03feef88cc4c831c24 100644 --- a/src/comm_brick.cpp +++ b/src/comm_brick.cpp @@ -611,7 +611,7 @@ void CommBrick::exchange() maxexchange = maxexchange_atom + maxexchange_fix; bufextra = maxexchange + BUFEXTRA; if (bufextra > bufextra_old) - memory->grow(buf_send,maxsend+bufextra,"comm:buf_send"); + grow_send(maxsend+bufextra,1); // subbox bounds for orthogonal or triclinic diff --git a/src/fix_gravity.cpp b/src/fix_gravity.cpp index c3e73d86c935dfbe497f9285004ea588162d1675..bc10eb280782bf83023b24a1202336e4b3e5ddf8 100644 --- a/src/fix_gravity.cpp +++ b/src/fix_gravity.cpp @@ -141,6 +141,8 @@ FixGravity::FixGravity(LAMMPS *lmp, int narg, char **arg) : FixGravity::~FixGravity() { + if (copymode) return; + delete [] mstr; delete [] vstr; delete [] pstr; diff --git a/src/fix_gravity.h b/src/fix_gravity.h index 35be23a40a12e37d35224f792426f34c6eafc538..1f18ee274be4fe57225112602b44c6033e1f1223 100644 --- a/src/fix_gravity.h +++ b/src/fix_gravity.h @@ -29,7 +29,7 @@ class FixGravity : public Fix { public: FixGravity(class LAMMPS *, int, char **); - ~FixGravity(); + virtual ~FixGravity(); int setmask(); void init(); void setup(int); diff --git a/src/fix_neigh_history.cpp b/src/fix_neigh_history.cpp index 9661409a6a1cbde20beacd3e3f1ff63605273b7b..c21b494aa41d0d515535afe9c64e7ef9c37f4273 100644 --- a/src/fix_neigh_history.cpp +++ b/src/fix_neigh_history.cpp @@ -95,6 +95,8 @@ FixNeighHistory::FixNeighHistory(LAMMPS *lmp, int narg, char **arg) : FixNeighHistory::~FixNeighHistory() { + if (copymode) return; + // unregister this fix so atom class doesn't invoke it any more atom->delete_callback(id,0);