diff --git a/src/KOKKOS/angle_charmm_kokkos.cpp b/src/KOKKOS/angle_charmm_kokkos.cpp index cb704627659e5bb7961c840086200118cf0407a9..816c9f3105808886d01e83bf2b46aa31905ab506 100755 --- a/src/KOKKOS/angle_charmm_kokkos.cpp +++ b/src/KOKKOS/angle_charmm_kokkos.cpp @@ -70,21 +70,20 @@ void AngleCharmmKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"angle:eatom"); - d_eatom = k_eatom.d_view; + if(k_eatom.dimension_0()<maxeatom) { + memory->destroy_kokkos(k_eatom,eatom); + memory->create_kokkos(k_eatom,eatom,maxeatom,"improper:eatom"); + d_eatom = k_eatom.d_view; + } } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"angle:vatom"); - d_vatom = k_vatom.d_view; + if(k_vatom.dimension_0()<maxvatom) { + memory->destroy_kokkos(k_vatom,vatom); + memory->create_kokkos(k_vatom,vatom,maxvatom,6,"improper:vatom"); + d_vatom = k_vatom.d_view; + } } - atomKK->sync(execution_space,datamask_read); - k_k.template sync<DeviceType>(); - k_theta0.template sync<DeviceType>(); - k_k_ub.template sync<DeviceType>(); - k_r_ub.template sync<DeviceType>(); if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); else atomKK->modified(execution_space,F_MASK); @@ -145,9 +144,6 @@ template<int NEWTON_BOND, int EVFLAG> KOKKOS_INLINE_FUNCTION void AngleCharmmKokkos<DeviceType>::operator()(TagAngleCharmmCompute<NEWTON_BOND,EVFLAG>, const int &n, EV_FLOAT& ev) const { - // The f array is atomic - Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > a_f = f; - const int i1 = anglelist(n,0); const int i2 = anglelist(n,1); const int i3 = anglelist(n,2); @@ -227,21 +223,21 @@ void AngleCharmmKokkos<DeviceType>::operator()(TagAngleCharmmCompute<NEWTON_BOND // apply force to each of 3 atoms if (NEWTON_BOND || i1 < nlocal) { - a_f(i1,0) += f1[0]; - a_f(i1,1) += f1[1]; - a_f(i1,2) += f1[2]; + f(i1,0) += f1[0]; + f(i1,1) += f1[1]; + f(i1,2) += f1[2]; } if (NEWTON_BOND || i2 < nlocal) { - a_f(i2,0) -= f1[0] + f3[0]; - a_f(i2,1) -= f1[1] + f3[1]; - a_f(i2,2) -= f1[2] + f3[2]; + f(i2,0) -= f1[0] + f3[0]; + f(i2,1) -= f1[1] + f3[1]; + f(i2,2) -= f1[2] + f3[2]; } if (NEWTON_BOND || i3 < nlocal) { - a_f(i3,0) += f3[0]; - a_f(i3,1) += f3[1]; - a_f(i3,2) += f3[2]; + f(i3,0) += f3[0]; + f(i3,1) += f3[1]; + f(i3,2) += f3[2]; } if (EVFLAG) ev_tally(ev,i1,i2,i3,eangle,f1,f3, @@ -262,17 +258,6 @@ template<class DeviceType> void AngleCharmmKokkos<DeviceType>::allocate() { AngleCharmm::allocate(); - - int n = atom->nangletypes; - k_k = DAT::tdual_ffloat_1d("AngleCharmm::k",n+1); - k_theta0 = DAT::tdual_ffloat_1d("AngleCharmm::theta0",n+1); - k_k_ub = DAT::tdual_ffloat_1d("AngleCharmm::k_ub",n+1); - k_r_ub = DAT::tdual_ffloat_1d("AngleCharmm::r_ub",n+1); - - d_k = k_k.d_view; - d_theta0 = k_theta0.d_view; - d_k_ub = k_k_ub.d_view; - d_r_ub = k_r_ub.d_view; } /* ---------------------------------------------------------------------- @@ -285,6 +270,16 @@ void AngleCharmmKokkos<DeviceType>::coeff(int narg, char **arg) AngleCharmm::coeff(narg, arg); int n = atom->nangletypes; + Kokkos::DualView<F_FLOAT*,DeviceType> k_k("AngleCharmm::k",n+1); + Kokkos::DualView<F_FLOAT*,DeviceType> k_theta0("AngleCharmm::theta0",n+1); + Kokkos::DualView<F_FLOAT*,DeviceType> k_k_ub("AngleCharmm::k_ub",n+1); + Kokkos::DualView<F_FLOAT*,DeviceType> k_r_ub("AngleCharmm::r_ub",n+1); + + d_k = k_k.d_view; + d_theta0 = k_theta0.d_view; + d_k_ub = k_k_ub.d_view; + d_r_ub = k_r_ub.d_view; + for (int i = 1; i <= n; i++) { k_k.h_view[i] = k[i]; k_theta0.h_view[i] = theta0[i]; @@ -296,6 +291,12 @@ void AngleCharmmKokkos<DeviceType>::coeff(int narg, char **arg) k_theta0.template modify<LMPHostType>(); k_k_ub.template modify<LMPHostType>(); k_r_ub.template modify<LMPHostType>(); + + k_k.template sync<DeviceType>(); + k_theta0.template sync<DeviceType>(); + k_k_ub.template sync<DeviceType>(); + k_r_ub.template sync<DeviceType>(); + } /* ---------------------------------------------------------------------- @@ -314,10 +315,6 @@ void AngleCharmmKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int i, const in E_FLOAT eanglethird; F_FLOAT v[6]; - // The eatom and vatom arrays are atomic - Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_eatom = k_eatom.view<DeviceType>(); - Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_vatom = k_vatom.view<DeviceType>(); - if (eflag_either) { if (eflag_global) { if (newton_bond) ev.evdwl += eangle; @@ -332,9 +329,9 @@ void AngleCharmmKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int i, const in if (eflag_atom) { eanglethird = THIRD*eangle; - if (newton_bond || i < nlocal) v_eatom[i] += eanglethird; - if (newton_bond || j < nlocal) v_eatom[j] += eanglethird; - if (newton_bond || k < nlocal) v_eatom[k] += eanglethird; + if (newton_bond || i < nlocal) d_eatom[i] += eanglethird; + if (newton_bond || j < nlocal) d_eatom[j] += eanglethird; + if (newton_bond || k < nlocal) d_eatom[k] += eanglethird; } } @@ -385,28 +382,28 @@ void AngleCharmmKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int i, const in if (vflag_atom) { if (newton_bond || i < nlocal) { - v_vatom(i,0) += THIRD*v[0]; - v_vatom(i,1) += THIRD*v[1]; - v_vatom(i,2) += THIRD*v[2]; - v_vatom(i,3) += THIRD*v[3]; - v_vatom(i,4) += THIRD*v[4]; - v_vatom(i,5) += THIRD*v[5]; + d_vatom(i,0) += THIRD*v[0]; + d_vatom(i,1) += THIRD*v[1]; + d_vatom(i,2) += THIRD*v[2]; + d_vatom(i,3) += THIRD*v[3]; + d_vatom(i,4) += THIRD*v[4]; + d_vatom(i,5) += THIRD*v[5]; } if (newton_bond || j < nlocal) { - v_vatom(j,0) += THIRD*v[0]; - v_vatom(j,1) += THIRD*v[1]; - v_vatom(j,2) += THIRD*v[2]; - v_vatom(j,3) += THIRD*v[3]; - v_vatom(j,4) += THIRD*v[4]; - v_vatom(j,5) += THIRD*v[5]; + d_vatom(j,0) += THIRD*v[0]; + d_vatom(j,1) += THIRD*v[1]; + d_vatom(j,2) += THIRD*v[2]; + d_vatom(j,3) += THIRD*v[3]; + d_vatom(j,4) += THIRD*v[4]; + d_vatom(j,5) += THIRD*v[5]; } if (newton_bond || k < nlocal) { - v_vatom(k,0) += THIRD*v[0]; - v_vatom(k,1) += THIRD*v[1]; - v_vatom(k,2) += THIRD*v[2]; - v_vatom(k,3) += THIRD*v[3]; - v_vatom(k,4) += THIRD*v[4]; - v_vatom(k,5) += THIRD*v[5]; + d_vatom(k,0) += THIRD*v[0]; + d_vatom(k,1) += THIRD*v[1]; + d_vatom(k,2) += THIRD*v[2]; + d_vatom(k,3) += THIRD*v[3]; + d_vatom(k,4) += THIRD*v[4]; + d_vatom(k,5) += THIRD*v[5]; } } @@ -415,7 +412,10 @@ void AngleCharmmKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int i, const in /* ---------------------------------------------------------------------- */ +namespace LAMMPS_NS { template class AngleCharmmKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class AngleCharmmKokkos<LMPHostType>; -#endif \ No newline at end of file +#endif +} + diff --git a/src/KOKKOS/angle_charmm_kokkos.h b/src/KOKKOS/angle_charmm_kokkos.h index 3ff623044b501605f499af074e2eafff686405af..18eeda74b58f7b106e5d8769038e4b449f1d40b5 100755 --- a/src/KOKKOS/angle_charmm_kokkos.h +++ b/src/KOKKOS/angle_charmm_kokkos.h @@ -60,27 +60,23 @@ class AngleCharmmKokkos : public AngleCharmm { class NeighborKokkos *neighborKK; - typename ArrayTypes<DeviceType>::t_x_array_randomread x; - typename ArrayTypes<DeviceType>::t_f_array f; - typename ArrayTypes<DeviceType>::t_int_2d anglelist; + typedef ArrayTypes<DeviceType> AT; + typename AT::t_x_array_randomread x; + typename Kokkos::View<double*[3],typename AT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > f; + typename AT::t_int_2d anglelist; - DAT::tdual_efloat_1d k_eatom; - DAT::tdual_virial_array k_vatom; - DAT::t_efloat_1d d_eatom; - DAT::t_virial_array d_vatom; + Kokkos::DualView<E_FLOAT*,Kokkos::LayoutRight,DeviceType> k_eatom; + Kokkos::DualView<F_FLOAT*[6],Kokkos::LayoutRight,DeviceType> k_vatom; + Kokkos::View<E_FLOAT*,Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > d_eatom; + Kokkos::View<F_FLOAT*[6],Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > d_vatom; int nlocal,newton_bond; int eflag,vflag; - DAT::tdual_ffloat_1d k_k; - DAT::tdual_ffloat_1d k_theta0; - DAT::tdual_ffloat_1d k_k_ub; - DAT::tdual_ffloat_1d k_r_ub; - - DAT::t_ffloat_1d d_k; - DAT::t_ffloat_1d d_theta0; - DAT::t_ffloat_1d d_k_ub; - DAT::t_ffloat_1d d_r_ub; + typename AT::t_ffloat_1d d_k; + typename AT::t_ffloat_1d d_theta0; + typename AT::t_ffloat_1d d_k_ub; + typename AT::t_ffloat_1d d_r_ub; virtual void allocate(); }; diff --git a/src/KOKKOS/angle_harmonic_kokkos.cpp b/src/KOKKOS/angle_harmonic_kokkos.cpp index c5976f1bff680bc56787b68b06b7248f4ece7fb7..0d3110f9049c61f3f99b8213bfda590107f35f86 100755 --- a/src/KOKKOS/angle_harmonic_kokkos.cpp +++ b/src/KOKKOS/angle_harmonic_kokkos.cpp @@ -72,24 +72,24 @@ void AngleHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in) if (eflag_atom) { memory->destroy_kokkos(k_eatom,eatom); memory->create_kokkos(k_eatom,eatom,maxeatom,"angle:eatom"); - d_eatom = k_eatom.d_view; + d_eatom = k_eatom.template view<DeviceType>(); } if (vflag_atom) { memory->destroy_kokkos(k_vatom,vatom); memory->create_kokkos(k_vatom,vatom,maxvatom,6,"angle:vatom"); - d_vatom = k_vatom.d_view; + d_vatom = k_vatom.template view<DeviceType>(); } - atomKK->sync(execution_space,datamask_read); + //atomKK->sync(execution_space,datamask_read); k_k.template sync<DeviceType>(); k_theta0.template sync<DeviceType>(); - if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); - else atomKK->modified(execution_space,F_MASK); + // if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); + // else atomKK->modified(execution_space,F_MASK); - x = atomKK->k_x.view<DeviceType>(); - f = atomKK->k_f.view<DeviceType>(); + x = atomKK->k_x.template view<DeviceType>(); + f = atomKK->k_f.template view<DeviceType>(); neighborKK->k_anglelist.template sync<DeviceType>(); - anglelist = neighborKK->k_anglelist.view<DeviceType>(); + anglelist = neighborKK->k_anglelist.template view<DeviceType>(); int nanglelist = neighborKK->nanglelist; nlocal = atom->nlocal; newton_bond = force->newton_bond; @@ -113,7 +113,6 @@ void AngleHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagAngleHarmonicCompute<0,0> >(0,nanglelist),*this); } } - DeviceType::fence(); if (eflag_global) energy += ev.evdwl; if (vflag_global) { @@ -242,11 +241,11 @@ void AngleHarmonicKokkos<DeviceType>::allocate() AngleHarmonic::allocate(); int n = atom->nangletypes; - k_k = DAT::tdual_ffloat_1d("AngleHarmonic::k",n+1); - k_theta0 = DAT::tdual_ffloat_1d("AngleHarmonic::theta0",n+1); + k_k = typename ArrayTypes<DeviceType>::tdual_ffloat_1d("AngleHarmonic::k",n+1); + k_theta0 = typename ArrayTypes<DeviceType>::tdual_ffloat_1d("AngleHarmonic::theta0",n+1); - d_k = k_k.d_view; - d_theta0 = k_theta0.d_view; + d_k = k_k.template view<DeviceType>(); + d_theta0 = k_theta0.template view<DeviceType>(); } /* ---------------------------------------------------------------------- @@ -285,8 +284,8 @@ void AngleHarmonicKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int i, const F_FLOAT v[6]; // The eatom and vatom arrays are atomic - Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_eatom = k_eatom.view<DeviceType>(); - Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_vatom = k_vatom.view<DeviceType>(); + Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_eatom = k_eatom.template view<DeviceType>(); + Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_vatom = k_vatom.template view<DeviceType>(); if (eflag_either) { if (eflag_global) { @@ -385,7 +384,10 @@ void AngleHarmonicKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int i, const /* ---------------------------------------------------------------------- */ +namespace LAMMPS_NS { template class AngleHarmonicKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class AngleHarmonicKokkos<LMPHostType>; -#endif \ No newline at end of file +#endif +} + diff --git a/src/KOKKOS/angle_harmonic_kokkos.h b/src/KOKKOS/angle_harmonic_kokkos.h index 0fe906fbb7fe0e416cbae582cb356825c4c04e77..8e98cd6b7464d411a0f0d768b15ae392e2faa452 100755 --- a/src/KOKKOS/angle_harmonic_kokkos.h +++ b/src/KOKKOS/angle_harmonic_kokkos.h @@ -65,19 +65,19 @@ class AngleHarmonicKokkos : public AngleHarmonic { typename ArrayTypes<DeviceType>::t_f_array f; typename ArrayTypes<DeviceType>::t_int_2d anglelist; - DAT::tdual_efloat_1d k_eatom; - DAT::tdual_virial_array k_vatom; - DAT::t_efloat_1d d_eatom; - DAT::t_virial_array d_vatom; + typename ArrayTypes<DeviceType>::tdual_efloat_1d k_eatom; + typename ArrayTypes<DeviceType>::tdual_virial_array k_vatom; + typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom; + typename ArrayTypes<DeviceType>::t_virial_array d_vatom; int nlocal,newton_bond; int eflag,vflag; - DAT::tdual_ffloat_1d k_k; - DAT::tdual_ffloat_1d k_theta0; + typename ArrayTypes<DeviceType>::tdual_ffloat_1d k_k; + typename ArrayTypes<DeviceType>::tdual_ffloat_1d k_theta0; - DAT::t_ffloat_1d d_k; - DAT::t_ffloat_1d d_theta0; + typename ArrayTypes<DeviceType>::t_ffloat_1d d_k; + typename ArrayTypes<DeviceType>::t_ffloat_1d d_theta0; virtual void allocate(); }; diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp index 39cc4beebcb0adfa902b27b8564664ea7dabbee2..0e2e6038c1a873dac7e9cb0ba648002211188597 100644 --- a/src/KOKKOS/atom_kokkos.cpp +++ b/src/KOKKOS/atom_kokkos.cpp @@ -88,6 +88,10 @@ void AtomKokkos::modified(const ExecutionSpace space, unsigned int mask) ((AtomVecKokkos *) avec)->modified(space,mask); } +void AtomKokkos::sync_overlapping_device(const ExecutionSpace space, unsigned int mask) +{ + ((AtomVecKokkos *) avec)->sync_overlapping_device(space,mask); +} /* ---------------------------------------------------------------------- */ void AtomKokkos::allocate_type_arrays() diff --git a/src/KOKKOS/atom_kokkos.h b/src/KOKKOS/atom_kokkos.h index 9bcd07aa4fe45f35f7f9b8ecd9358cf7ed0979e8..05aae712d9cbebccfcff1a67c65fa10d7b751a0a 100644 --- a/src/KOKKOS/atom_kokkos.h +++ b/src/KOKKOS/atom_kokkos.h @@ -57,6 +57,7 @@ class AtomKokkos : public Atom { virtual void allocate_type_arrays(); void sync(const ExecutionSpace space, unsigned int mask); void modified(const ExecutionSpace space, unsigned int mask); + void sync_overlapping_device(const ExecutionSpace space, unsigned int mask); virtual void sort(); virtual void grow(unsigned int mask); virtual void deallocate_topology(); diff --git a/src/KOKKOS/atom_vec_angle_kokkos.cpp b/src/KOKKOS/atom_vec_angle_kokkos.cpp index f07e4a9e66bfe46a37f29064cc1b806504ae32c0..6d063793e01407f7fb07b8577d0f67677fcb30dc 100644 --- a/src/KOKKOS/atom_vec_angle_kokkos.cpp +++ b/src/KOKKOS/atom_vec_angle_kokkos.cpp @@ -48,6 +48,7 @@ AtomVecAngleKokkos::AtomVecAngleKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) k_count = DAT::tdual_int_1d("atom::k_count",1); atomKK = (AtomKokkos *) atom; commKK = (CommKokkos *) comm; + buffer = NULL; } /* ---------------------------------------------------------------------- @@ -1021,6 +1022,7 @@ void AtomVecAngleKokkos::unpack_border_vel(int n, int first, double *buf) last = first + n; for (i = first; i < last; i++) { if (i == nmax) grow(0); + modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; @@ -1830,6 +1832,97 @@ void AtomVecAngleKokkos::sync(ExecutionSpace space, unsigned int mask) } } +void AtomVecAngleKokkos::sync_overlapping_device(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space); + if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space); + if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space); + if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space); + if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space); + if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space); + if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space); + if ((mask & MOLECULE_MASK) && atomKK->k_molecule.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_molecule,space); + if (mask & SPECIAL_MASK) { + if (atomKK->k_nspecial.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_2d>(atomKK->k_nspecial,space); + if (atomKK->k_special.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_special,space); + } + if (mask & BOND_MASK) { + if (atomKK->k_num_bond.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_bond,space); + if (atomKK->k_bond_type.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_2d>(atomKK->k_bond_type,space); + if (atomKK->k_bond_atom.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_bond_atom,space); + } + if (mask & ANGLE_MASK) { + if (atomKK->k_num_angle.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_angle,space); + if (atomKK->k_angle_type.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_2d>(atomKK->k_angle_type,space); + if (atomKK->k_angle_atom1.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom1,space); + if (atomKK->k_angle_atom2.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom2,space); + if (atomKK->k_angle_atom3.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom3,space); + } + } else { + if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space); + if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space); + if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space); + if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space); + if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space); + if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space); + if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space); + if ((mask & MOLECULE_MASK) && atomKK->k_molecule.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_molecule,space); + if (mask & SPECIAL_MASK) { + if (atomKK->k_nspecial.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_2d>(atomKK->k_nspecial,space); + if (atomKK->k_special.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_special,space); + } + if (mask & BOND_MASK) { + if (atomKK->k_num_bond.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_bond,space); + if (atomKK->k_bond_type.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_2d>(atomKK->k_bond_type,space); + if (atomKK->k_bond_atom.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_bond_atom,space); + } + if (mask & ANGLE_MASK) { + if (atomKK->k_num_angle.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_angle,space); + if (atomKK->k_angle_type.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_2d>(atomKK->k_angle_type,space); + if (atomKK->k_angle_atom1.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom1,space); + if (atomKK->k_angle_atom2.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom2,space); + if (atomKK->k_angle_atom3.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom3,space); + } + } +} + /* ---------------------------------------------------------------------- */ void AtomVecAngleKokkos::modified(ExecutionSpace space, unsigned int mask) @@ -1886,3 +1979,4 @@ void AtomVecAngleKokkos::modified(ExecutionSpace space, unsigned int mask) } } } + diff --git a/src/KOKKOS/atom_vec_angle_kokkos.h b/src/KOKKOS/atom_vec_angle_kokkos.h index 39874780f13add45909f7f761a06c439bb8c86a4..0a477b03866560f7b9175b8342b25f28c9cb814e 100644 --- a/src/KOKKOS/atom_vec_angle_kokkos.h +++ b/src/KOKKOS/atom_vec_angle_kokkos.h @@ -83,6 +83,7 @@ class AtomVecAngleKokkos : public AtomVecKokkos { void sync(ExecutionSpace space, unsigned int mask); void modified(ExecutionSpace space, unsigned int mask); + void sync_overlapping_device(ExecutionSpace space, unsigned int mask); protected: diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.cpp b/src/KOKKOS/atom_vec_atomic_kokkos.cpp index 4b50b9b9fd514a5ae66aac84d779a4ac8a3dc9fe..21170730773a82c15adff3a8d5a92cf7d4066fc5 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.cpp +++ b/src/KOKKOS/atom_vec_atomic_kokkos.cpp @@ -896,6 +896,7 @@ void AtomVecAtomicKokkos::unpack_border_vel(int n, int first, double *buf) last = first + n; for (i = first; i < last; i++) { if (i == nmax) grow(0); + modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; @@ -1377,6 +1378,43 @@ void AtomVecAtomicKokkos::sync(ExecutionSpace space, unsigned int mask) /* ---------------------------------------------------------------------- */ +void AtomVecAtomicKokkos::sync_overlapping_device(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space); + if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space); + if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space); + if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space); + if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space); + if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space); + if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space); + } else { + if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space); + if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space); + if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space); + if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space); + if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space); + if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space); + if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space); + } +} + +/* ---------------------------------------------------------------------- */ + void AtomVecAtomicKokkos::modified(ExecutionSpace space, unsigned int mask) { if (space == Device) { @@ -1397,3 +1435,4 @@ void AtomVecAtomicKokkos::modified(ExecutionSpace space, unsigned int mask) if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPHostType>(); } } + diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.h b/src/KOKKOS/atom_vec_atomic_kokkos.h index 6e73f4ee3c706a1b1d5e3b62c39c5e5c96a55ede..0c3e24f9d9661303a51db514eb13f5ce416f9375 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.h +++ b/src/KOKKOS/atom_vec_atomic_kokkos.h @@ -79,6 +79,7 @@ class AtomVecAtomicKokkos : public AtomVecKokkos { void sync(ExecutionSpace space, unsigned int mask); void modified(ExecutionSpace space, unsigned int mask); + void sync_overlapping_device(ExecutionSpace space, unsigned int mask); protected: tagint *tag; diff --git a/src/KOKKOS/atom_vec_bond_kokkos.cpp b/src/KOKKOS/atom_vec_bond_kokkos.cpp index 65ac756956509dcb7222f5250e39f947a818b31d..d42ec04834ac7f3e50a9cd967dcdf708654b2eb4 100644 --- a/src/KOKKOS/atom_vec_bond_kokkos.cpp +++ b/src/KOKKOS/atom_vec_bond_kokkos.cpp @@ -970,6 +970,7 @@ void AtomVecBondKokkos::unpack_border_vel(int n, int first, double *buf) last = first + n; for (i = first; i < last; i++) { if (i == nmax) grow(0); + modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; @@ -1673,6 +1674,75 @@ void AtomVecBondKokkos::sync(ExecutionSpace space, unsigned int mask) /* ---------------------------------------------------------------------- */ +void AtomVecBondKokkos::sync_overlapping_device(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space); + if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space); + if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space); + if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space); + if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space); + if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space); + if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space); + if ((mask & MOLECULE_MASK) && atomKK->k_molecule.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_molecule,space); + if (mask & SPECIAL_MASK) { + if (atomKK->k_nspecial.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_2d>(atomKK->k_nspecial,space); + if (atomKK->k_special.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_special,space); + } + if (mask & BOND_MASK) { + if (atomKK->k_num_bond.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_bond,space); + if (atomKK->k_bond_type.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_2d>(atomKK->k_bond_type,space); + if (atomKK->k_bond_atom.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_bond_atom,space); + } + } else { + if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space); + if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space); + if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space); + if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space); + if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space); + if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space); + if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space); + if ((mask & MOLECULE_MASK) && atomKK->k_molecule.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_molecule,space); + if (mask & SPECIAL_MASK) { + if (atomKK->k_nspecial.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_2d>(atomKK->k_nspecial,space); + if (atomKK->k_special.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_special,space); + } + if (mask & BOND_MASK) { + if (atomKK->k_num_bond.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_bond,space); + if (atomKK->k_bond_type.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_2d>(atomKK->k_bond_type,space); + if (atomKK->k_bond_atom.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_bond_atom,space); + } + } +} + +/* ---------------------------------------------------------------------- */ + void AtomVecBondKokkos::modified(ExecutionSpace space, unsigned int mask) { if (space == Device) { @@ -1713,3 +1783,4 @@ void AtomVecBondKokkos::modified(ExecutionSpace space, unsigned int mask) } } } + diff --git a/src/KOKKOS/atom_vec_bond_kokkos.h b/src/KOKKOS/atom_vec_bond_kokkos.h index 1ff9d8fa4510a940b764bee2c4e31cfb7e5c08c2..e64017c99be6f8deb6a2055aeb611e310e4d9c0b 100644 --- a/src/KOKKOS/atom_vec_bond_kokkos.h +++ b/src/KOKKOS/atom_vec_bond_kokkos.h @@ -83,6 +83,7 @@ class AtomVecBondKokkos : public AtomVecKokkos { void sync(ExecutionSpace space, unsigned int mask); void modified(ExecutionSpace space, unsigned int mask); + void sync_overlapping_device(ExecutionSpace space, unsigned int mask); protected: diff --git a/src/KOKKOS/atom_vec_charge_kokkos.cpp b/src/KOKKOS/atom_vec_charge_kokkos.cpp index 87e9ac1cf4de58073d184a735eab23000e735d6c..4669dd8d7c8902975337c6978b98dc7565e92942 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.cpp +++ b/src/KOKKOS/atom_vec_charge_kokkos.cpp @@ -939,6 +939,7 @@ void AtomVecChargeKokkos::unpack_border_vel(int n, int first, double *buf) last = first + n; for (i = first; i < last; i++) { if (i == nmax) grow(0); + modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK); h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; @@ -1330,11 +1331,10 @@ void AtomVecChargeKokkos::create_atom(int itype, double *coord) { int nlocal = atom->nlocal; if (nlocal == nmax) { - //if(nlocal>2) printf("typeA: %i %i\n",type[0],type[1]); atomKK->modified(Host,ALL_MASK); grow(0); - //if(nlocal>2) printf("typeB: %i %i\n",type[0],type[1]); } + atomKK->sync(Host,ALL_MASK); atomKK->modified(Host,ALL_MASK); tag[nlocal] = 0; @@ -1520,3 +1520,43 @@ void AtomVecChargeKokkos::modified(ExecutionSpace space, unsigned int mask) if (mask & Q_MASK) atomKK->k_q.modify<LMPHostType>(); } } + +void AtomVecChargeKokkos::sync_overlapping_device(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space); + if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space); + if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space); + if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space); + if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space); + if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space); + if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space); + if ((mask & MOLECULE_MASK) && atomKK->k_q.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_float_1d>(atomKK->k_q,space); + } else { + if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space); + if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space); + if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space); + if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space); + if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space); + if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space); + if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space); + if ((mask & MOLECULE_MASK) && atomKK->k_q.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_float_1d>(atomKK->k_q,space); + } +} + diff --git a/src/KOKKOS/atom_vec_charge_kokkos.h b/src/KOKKOS/atom_vec_charge_kokkos.h index 46a05371aa11f2b16fe92ea56b3b113eac31bf48..38e32458c685d0ca5798f180dd177ed1cd2e9675 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.h +++ b/src/KOKKOS/atom_vec_charge_kokkos.h @@ -84,6 +84,7 @@ class AtomVecChargeKokkos : public AtomVecKokkos { void sync(ExecutionSpace space, unsigned int mask); void modified(ExecutionSpace space, unsigned int mask); + void sync_overlapping_device(ExecutionSpace space, unsigned int mask); protected: tagint *tag; diff --git a/src/KOKKOS/atom_vec_full_kokkos.cpp b/src/KOKKOS/atom_vec_full_kokkos.cpp index d41a88bd88f95954996fdf3421c73daede34a635..7c1ebeed3a1ca6be2666a57642924c6f7f111363 100644 --- a/src/KOKKOS/atom_vec_full_kokkos.cpp +++ b/src/KOKKOS/atom_vec_full_kokkos.cpp @@ -1110,10 +1110,9 @@ void AtomVecFullKokkos::unpack_border(int n, int first, double *buf) m = 0; last = first + n; - if(n > 0) - modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK); for (i = first; i < last; i++) { if (i == nmax) grow(0); + modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK); h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; @@ -1140,7 +1139,7 @@ void AtomVecFullKokkos::unpack_border_vel(int n, int first, double *buf) last = first + n; for (i = first; i < last; i++) { if (i == nmax) grow(0); - modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK); + modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK); h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; @@ -1879,7 +1878,9 @@ int AtomVecFullKokkos::unpack_restart(double *buf) if (atom->nextra_store) memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra"); } - + sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK | Q_MASK | MOLECULE_MASK | BOND_MASK | + ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK); modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | Q_MASK | MOLECULE_MASK | BOND_MASK | ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK); @@ -1955,6 +1956,7 @@ void AtomVecFullKokkos::create_atom(int itype, double *coord) atomKK->modified(Host,ALL_MASK); grow(0); } + atomKK->sync(Host,ALL_MASK); atomKK->modified(Host,ALL_MASK); tag[nlocal] = 0; @@ -2246,6 +2248,157 @@ void AtomVecFullKokkos::sync(ExecutionSpace space, unsigned int mask) /* ---------------------------------------------------------------------- */ +void AtomVecFullKokkos::sync_overlapping_device(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space); + if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space); + if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space); + if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space); + if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space); + if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space); + if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space); + if ((mask & Q_MASK) && atomKK->k_q.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_float_1d>(atomKK->k_q,space); + if ((mask & MOLECULE_MASK) && atomKK->k_molecule.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_molecule,space); + if (mask & SPECIAL_MASK) { + if (atomKK->k_nspecial.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_2d>(atomKK->k_nspecial,space); + if (atomKK->k_special.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_special,space); + } + if (mask & BOND_MASK) { + if (atomKK->k_num_bond.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_bond,space); + if (atomKK->k_bond_type.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_2d>(atomKK->k_bond_type,space); + if (atomKK->k_bond_atom.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_bond_atom,space); + } + if (mask & ANGLE_MASK) { + if (atomKK->k_num_angle.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_angle,space); + if (atomKK->k_angle_type.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_2d>(atomKK->k_angle_type,space); + if (atomKK->k_angle_atom1.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom1,space); + if (atomKK->k_angle_atom2.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom2,space); + if (atomKK->k_angle_atom3.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom3,space); + } + if (mask & DIHEDRAL_MASK) { + if (atomKK->k_num_dihedral.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_dihedral,space); + if (atomKK->k_dihedral_type.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_2d>(atomKK->k_dihedral_type,space); + if (atomKK->k_dihedral_atom1.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_dihedral_atom1,space); + if (atomKK->k_dihedral_atom2.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_dihedral_atom2,space); + if (atomKK->k_dihedral_atom3.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_dihedral_atom3,space); + } + if (mask & IMPROPER_MASK) { + if (atomKK->k_num_improper.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_improper,space); + if (atomKK->k_improper_type.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_2d>(atomKK->k_improper_type,space); + if (atomKK->k_improper_atom1.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_improper_atom1,space); + if (atomKK->k_improper_atom2.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_improper_atom2,space); + if (atomKK->k_improper_atom3.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_improper_atom3,space); + if (atomKK->k_improper_atom4.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_improper_atom4,space); + } + } else { + if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space); + if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space); + if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space); + if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space); + if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space); + if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space); + if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space); + if ((mask & Q_MASK) && atomKK->k_q.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_float_1d>(atomKK->k_q,space); + if ((mask & MOLECULE_MASK) && atomKK->k_molecule.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_molecule,space); + if (mask & SPECIAL_MASK) { + if (atomKK->k_nspecial.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_2d>(atomKK->k_nspecial,space); + if (atomKK->k_special.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_special,space); + } + if (mask & BOND_MASK) { + if (atomKK->k_num_bond.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_bond,space); + if (atomKK->k_bond_type.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_2d>(atomKK->k_bond_type,space); + if (atomKK->k_bond_atom.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_bond_atom,space); + } + if (mask & ANGLE_MASK) { + if (atomKK->k_num_angle.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_angle,space); + if (atomKK->k_angle_type.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_2d>(atomKK->k_angle_type,space); + if (atomKK->k_angle_atom1.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom1,space); + if (atomKK->k_angle_atom2.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom2,space); + if (atomKK->k_angle_atom3.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom3,space); + } + if (mask & DIHEDRAL_MASK) { + if (atomKK->k_num_dihedral.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_dihedral,space); + if (atomKK->k_dihedral_type.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_2d>(atomKK->k_dihedral_type,space); + if (atomKK->k_dihedral_atom1.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_dihedral_atom1,space); + if (atomKK->k_dihedral_atom2.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_dihedral_atom2,space); + if (atomKK->k_dihedral_atom3.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_dihedral_atom3,space); + if (atomKK->k_dihedral_atom4.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_dihedral_atom4,space); + } + if (mask & IMPROPER_MASK) { + if (atomKK->k_num_improper.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_improper,space); + if (atomKK->k_improper_type.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_2d>(atomKK->k_improper_type,space); + if (atomKK->k_improper_atom1.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_improper_atom1,space); + if (atomKK->k_improper_atom2.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_improper_atom2,space); + if (atomKK->k_improper_atom3.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_improper_atom3,space); + if (atomKK->k_improper_atom4.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_improper_atom4,space); + } + } +} + +/* ---------------------------------------------------------------------- */ + void AtomVecFullKokkos::modified(ExecutionSpace space, unsigned int mask) { if (space == Device) { @@ -2334,3 +2487,4 @@ void AtomVecFullKokkos::modified(ExecutionSpace space, unsigned int mask) } } } + diff --git a/src/KOKKOS/atom_vec_full_kokkos.h b/src/KOKKOS/atom_vec_full_kokkos.h index b66d6b70f07cfb6dcf52c51d7d3ec9a4a19ac553..841707b33888e081ac9fcebef45dec1734c8ff50 100644 --- a/src/KOKKOS/atom_vec_full_kokkos.h +++ b/src/KOKKOS/atom_vec_full_kokkos.h @@ -83,6 +83,7 @@ class AtomVecFullKokkos : public AtomVecKokkos { void sync(ExecutionSpace space, unsigned int mask); void modified(ExecutionSpace space, unsigned int mask); + void sync_overlapping_device(ExecutionSpace space, unsigned int mask); protected: diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp index 1d9174196a79583a4f4087446c8555d41403a348..5542991395157470204cf82b701c92c3908368d0 100644 --- a/src/KOKKOS/atom_vec_kokkos.cpp +++ b/src/KOKKOS/atom_vec_kokkos.cpp @@ -20,4 +20,7 @@ using namespace LAMMPS_NS; AtomVecKokkos::AtomVecKokkos(LAMMPS *lmp) : AtomVec(lmp) { kokkosable = 1; + buffer = NULL; + buffer_size = 0; } + diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h index b10c4a21565d46bed04664cc8a9ca6a4e9099e68..fbeeaf96be3a6415e62901a2edbfb3231c331952 100644 --- a/src/KOKKOS/atom_vec_kokkos.h +++ b/src/KOKKOS/atom_vec_kokkos.h @@ -16,6 +16,7 @@ #include "atom_vec.h" #include "kokkos_type.h" +#include <type_traits> namespace LAMMPS_NS { @@ -26,6 +27,7 @@ class AtomVecKokkos : public AtomVec { virtual void sync(ExecutionSpace space, unsigned int mask) = 0; virtual void modified(ExecutionSpace space, unsigned int mask) = 0; + virtual void sync_overlapping_device(ExecutionSpace space, unsigned int mask) {}; virtual int pack_comm_self(const int &n, const DAT::tdual_int_2d &list, @@ -65,6 +67,73 @@ class AtomVecKokkos : public AtomVec { protected: class CommKokkos *commKK; + size_t buffer_size; + void* buffer; + + #ifdef KOKKOS_HAVE_CUDA + template<class ViewType> + Kokkos::View<typename ViewType::data_type, + typename ViewType::array_layout, + Kokkos::CudaHostPinnedSpace, + Kokkos::MemoryTraits<Kokkos::Unmanaged> > + create_async_copy(const ViewType& src) { + typedef Kokkos::View<typename ViewType::data_type, + typename ViewType::array_layout, + typename std::conditional< + std::is_same<typename ViewType::execution_space,LMPDeviceType>::value, + Kokkos::CudaHostPinnedSpace,typename ViewType::memory_space>::type, + Kokkos::MemoryTraits<Kokkos::Unmanaged> > mirror_type; + if(buffer_size < src.capacity()) + buffer = Kokkos::kokkos_realloc<Kokkos::CudaHostPinnedSpace>(buffer,src.capacity()); + return mirror_type( buffer , + src.dimension_0() , + src.dimension_1() , + src.dimension_2() , + src.dimension_3() , + src.dimension_4() , + src.dimension_5() , + src.dimension_6() , + src.dimension_7() ); + } + + template<class ViewType> + void perform_async_copy(const ViewType& src, unsigned int space) { + typedef Kokkos::View<typename ViewType::data_type, + typename ViewType::array_layout, + typename std::conditional< + std::is_same<typename ViewType::execution_space,LMPDeviceType>::value, + Kokkos::CudaHostPinnedSpace,typename ViewType::memory_space>::type, + Kokkos::MemoryTraits<Kokkos::Unmanaged> > mirror_type; + if(buffer_size < src.capacity()) + buffer = Kokkos::kokkos_realloc<Kokkos::CudaHostPinnedSpace>(buffer,src.capacity()*sizeof(typename ViewType::value_type)); + mirror_type tmp_view( (typename ViewType::value_type*)buffer , + src.dimension_0() , + src.dimension_1() , + src.dimension_2() , + src.dimension_3() , + src.dimension_4() , + src.dimension_5() , + src.dimension_6() , + src.dimension_7() ); + if(space == Device) { + Kokkos::deep_copy(LMPHostType(),tmp_view,src.h_view), + Kokkos::deep_copy(LMPHostType(),src.d_view,tmp_view); + src.modified_device() = src.modified_host(); + } else { + Kokkos::deep_copy(LMPHostType(),tmp_view,src.d_view), + Kokkos::deep_copy(LMPHostType(),src.h_view,tmp_view); + src.modified_device() = src.modified_host(); + } + } + #else + template<class ViewType> + void perform_async_copy(ViewType& src, unsigned int space) { + if(space == Device) + src.template sync<LMPDeviceType>(); + else + src.template sync<LMPHostType>(); + } + #endif }; } diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.cpp b/src/KOKKOS/atom_vec_molecular_kokkos.cpp index 848e146ac83e51b979b20e3c161fed732dcac244..7217ad49cbc4559e0cb1f591590a22a5d88c3bc3 100644 --- a/src/KOKKOS/atom_vec_molecular_kokkos.cpp +++ b/src/KOKKOS/atom_vec_molecular_kokkos.cpp @@ -1094,6 +1094,7 @@ void AtomVecMolecularKokkos::unpack_border_vel(int n, int first, double *buf) last = first + n; for (i = first; i < last; i++) { if (i == nmax) grow(0); + modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; @@ -2149,6 +2150,151 @@ void AtomVecMolecularKokkos::sync(ExecutionSpace space, unsigned int mask) } } +void AtomVecMolecularKokkos::sync_overlapping_device(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space); + if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space); + if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space); + if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space); + if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space); + if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space); + if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space); + if ((mask & MOLECULE_MASK) && atomKK->k_molecule.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_molecule,space); + if (mask & SPECIAL_MASK) { + if (atomKK->k_nspecial.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_2d>(atomKK->k_nspecial,space); + if (atomKK->k_special.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_special,space); + } + if (mask & BOND_MASK) { + if (atomKK->k_num_bond.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_bond,space); + if (atomKK->k_bond_type.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_2d>(atomKK->k_bond_type,space); + if (atomKK->k_bond_atom.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_bond_atom,space); + } + if (mask & ANGLE_MASK) { + if (atomKK->k_num_angle.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_angle,space); + if (atomKK->k_angle_type.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_2d>(atomKK->k_angle_type,space); + if (atomKK->k_angle_atom1.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom1,space); + if (atomKK->k_angle_atom2.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom2,space); + if (atomKK->k_angle_atom3.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom3,space); + } + if (mask & DIHEDRAL_MASK) { + if (atomKK->k_num_dihedral.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_dihedral,space); + if (atomKK->k_dihedral_type.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_2d>(atomKK->k_dihedral_type,space); + if (atomKK->k_dihedral_atom1.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_dihedral_atom1,space); + if (atomKK->k_dihedral_atom2.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_dihedral_atom2,space); + if (atomKK->k_dihedral_atom3.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_dihedral_atom3,space); + } + if (mask & IMPROPER_MASK) { + if (atomKK->k_num_improper.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_improper,space); + if (atomKK->k_improper_type.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_2d>(atomKK->k_improper_type,space); + if (atomKK->k_improper_atom1.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_improper_atom1,space); + if (atomKK->k_improper_atom2.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_improper_atom2,space); + if (atomKK->k_improper_atom3.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_improper_atom3,space); + if (atomKK->k_improper_atom4.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_improper_atom4,space); + } + } else { + if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space); + if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space); + if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space); + if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space); + if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space); + if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space); + if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space); + if ((mask & MOLECULE_MASK) && atomKK->k_molecule.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_molecule,space); + if (mask & SPECIAL_MASK) { + if (atomKK->k_nspecial.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_2d>(atomKK->k_nspecial,space); + if (atomKK->k_special.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_special,space); + } + if (mask & BOND_MASK) { + if (atomKK->k_num_bond.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_bond,space); + if (atomKK->k_bond_type.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_2d>(atomKK->k_bond_type,space); + if (atomKK->k_bond_atom.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_bond_atom,space); + } + if (mask & ANGLE_MASK) { + if (atomKK->k_num_angle.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_angle,space); + if (atomKK->k_angle_type.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_2d>(atomKK->k_angle_type,space); + if (atomKK->k_angle_atom1.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom1,space); + if (atomKK->k_angle_atom2.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom2,space); + if (atomKK->k_angle_atom3.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_angle_atom3,space); + } + if (mask & DIHEDRAL_MASK) { + if (atomKK->k_num_dihedral.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_dihedral,space); + if (atomKK->k_dihedral_type.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_2d>(atomKK->k_dihedral_type,space); + if (atomKK->k_dihedral_atom1.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_dihedral_atom1,space); + if (atomKK->k_dihedral_atom2.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_dihedral_atom2,space); + if (atomKK->k_dihedral_atom3.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_dihedral_atom3,space); + if (atomKK->k_dihedral_atom4.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_dihedral_atom4,space); + } + if (mask & IMPROPER_MASK) { + if (atomKK->k_num_improper.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_num_improper,space); + if (atomKK->k_improper_type.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_2d>(atomKK->k_improper_type,space); + if (atomKK->k_improper_atom1.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_improper_atom1,space); + if (atomKK->k_improper_atom2.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_improper_atom2,space); + if (atomKK->k_improper_atom3.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_improper_atom3,space); + if (atomKK->k_improper_atom4.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_2d>(atomKK->k_improper_atom4,space); + } + } +} + /* ---------------------------------------------------------------------- */ void AtomVecMolecularKokkos::modified(ExecutionSpace space, unsigned int mask) @@ -2237,3 +2383,4 @@ void AtomVecMolecularKokkos::modified(ExecutionSpace space, unsigned int mask) } } } + diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.h b/src/KOKKOS/atom_vec_molecular_kokkos.h index 9d867006573779bc2e1e9a68f3239894e82e74ee..4ec26621cc8397f2a565027b564d128df25f25d3 100644 --- a/src/KOKKOS/atom_vec_molecular_kokkos.h +++ b/src/KOKKOS/atom_vec_molecular_kokkos.h @@ -83,6 +83,7 @@ class AtomVecMolecularKokkos : public AtomVecKokkos { void sync(ExecutionSpace space, unsigned int mask); void modified(ExecutionSpace space, unsigned int mask); + void sync_overlapping_device(ExecutionSpace space, unsigned int mask); protected: diff --git a/src/KOKKOS/bond_fene_kokkos.cpp b/src/KOKKOS/bond_fene_kokkos.cpp index c8b643261a3146444a6f31f5ec582e80f2ed556e..7ff2e568d1c4c917c24fe050740788f22b008e7c 100755 --- a/src/KOKKOS/bond_fene_kokkos.cpp +++ b/src/KOKKOS/bond_fene_kokkos.cpp @@ -379,7 +379,10 @@ void BondFENEKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const int /* ---------------------------------------------------------------------- */ +namespace LAMMPS_NS { template class BondFENEKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class BondFENEKokkos<LMPHostType>; -#endif \ No newline at end of file +#endif +} + diff --git a/src/KOKKOS/bond_harmonic_kokkos.cpp b/src/KOKKOS/bond_harmonic_kokkos.cpp index ba2ab19a0433f3204add21f802aab59f54a120c3..9b34786ec07be79892c8db776b6b9e9178900d72 100755 --- a/src/KOKKOS/bond_harmonic_kokkos.cpp +++ b/src/KOKKOS/bond_harmonic_kokkos.cpp @@ -67,26 +67,27 @@ void BondHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"bond:eatom"); - d_eatom = k_eatom.d_view; + if(k_eatom.dimension_0()<maxeatom) { + memory->destroy_kokkos(k_eatom,eatom); + memory->create_kokkos(k_eatom,eatom,maxeatom,"improper:eatom"); + d_eatom = k_eatom.d_view; + } } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"bond:vatom"); - d_vatom = k_vatom.d_view; + if(k_vatom.dimension_0()<maxvatom) { + memory->destroy_kokkos(k_vatom,vatom); + memory->create_kokkos(k_vatom,vatom,maxvatom,6,"improper:vatom"); + d_vatom = k_vatom.d_view; + } } - atomKK->sync(execution_space,datamask_read); - k_k.template sync<DeviceType>(); - k_r0.template sync<DeviceType>(); - if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); - else atomKK->modified(execution_space,F_MASK); +// if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); +// else atomKK->modified(execution_space,F_MASK); - x = atomKK->k_x.view<DeviceType>(); - f = atomKK->k_f.view<DeviceType>(); + x = atomKK->k_x.template view<DeviceType>(); + f = atomKK->k_f.template view<DeviceType>(); neighborKK->k_bondlist.template sync<DeviceType>(); - bondlist = neighborKK->k_bondlist.view<DeviceType>(); + bondlist = neighborKK->k_bondlist.template view<DeviceType>(); int nbondlist = neighborKK->nbondlist; nlocal = atom->nlocal; newton_bond = force->newton_bond; @@ -110,7 +111,7 @@ void BondHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagBondHarmonicCompute<0,0> >(0,nbondlist),*this); } } - DeviceType::fence(); + //DeviceType::fence(); if (eflag_global) energy += ev.evdwl; if (vflag_global) { @@ -140,9 +141,6 @@ template<int NEWTON_BOND, int EVFLAG> KOKKOS_INLINE_FUNCTION void BondHarmonicKokkos<DeviceType>::operator()(TagBondHarmonicCompute<NEWTON_BOND,EVFLAG>, const int &n, EV_FLOAT& ev) const { - // The f array is atomic - Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > a_f = f; - const int i1 = bondlist(n,0); const int i2 = bondlist(n,1); const int type = bondlist(n,2); @@ -168,15 +166,15 @@ void BondHarmonicKokkos<DeviceType>::operator()(TagBondHarmonicCompute<NEWTON_BO // apply force to each of 2 atoms if (NEWTON_BOND || i1 < nlocal) { - a_f(i1,0) += delx*fbond; - a_f(i1,1) += dely*fbond; - a_f(i1,2) += delz*fbond; + f(i1,0) += delx*fbond; + f(i1,1) += dely*fbond; + f(i1,2) += delz*fbond; } if (NEWTON_BOND || i2 < nlocal) { - a_f(i2,0) -= delx*fbond; - a_f(i2,1) -= dely*fbond; - a_f(i2,2) -= delz*fbond; + f(i2,0) -= delx*fbond; + f(i2,1) -= dely*fbond; + f(i2,2) -= delz*fbond; } if (EVFLAG) ev_tally(ev,i1,i2,ebond,fbond,delx,dely,delz); @@ -196,13 +194,6 @@ template<class DeviceType> void BondHarmonicKokkos<DeviceType>::allocate() { BondHarmonic::allocate(); - - int n = atom->nbondtypes; - k_k = DAT::tdual_ffloat_1d("BondHarmonic::k",n+1); - k_r0 = DAT::tdual_ffloat_1d("BondHarmonic::r0",n+1); - - d_k = k_k.d_view; - d_r0 = k_r0.d_view; } /* ---------------------------------------------------------------------- @@ -215,6 +206,12 @@ void BondHarmonicKokkos<DeviceType>::coeff(int narg, char **arg) BondHarmonic::coeff(narg, arg); int n = atom->nbondtypes; + Kokkos::DualView<F_FLOAT*,DeviceType> k_k("BondHarmonic::k",n+1); + Kokkos::DualView<F_FLOAT*,DeviceType> k_r0("BondHarmonic::r0",n+1); + + d_k = k_k.template view<DeviceType>(); + d_r0 = k_r0.template view<DeviceType>(); + for (int i = 1; i <= n; i++) { k_k.h_view[i] = k[i]; k_r0.h_view[i] = r0[i]; @@ -222,6 +219,9 @@ void BondHarmonicKokkos<DeviceType>::coeff(int narg, char **arg) k_k.template modify<LMPHostType>(); k_r0.template modify<LMPHostType>(); + k_k.template sync<DeviceType>(); + k_r0.template sync<DeviceType>(); + } /* ---------------------------------------------------------------------- @@ -238,10 +238,6 @@ void BondHarmonicKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const E_FLOAT ebondhalf; F_FLOAT v[6]; - // The eatom and vatom arrays are atomic - Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_eatom = k_eatom.view<DeviceType>(); - Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_vatom = k_vatom.view<DeviceType>(); - if (eflag_either) { if (eflag_global) { if (newton_bond) ev.evdwl += ebond; @@ -253,8 +249,8 @@ void BondHarmonicKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const } if (eflag_atom) { ebondhalf = 0.5*ebond; - if (newton_bond || i < nlocal) v_eatom[i] += ebondhalf; - if (newton_bond || j < nlocal) v_eatom[j] += ebondhalf; + if (newton_bond || i < nlocal) d_eatom[i] += ebondhalf; + if (newton_bond || j < nlocal) d_eatom[j] += ebondhalf; } } @@ -296,20 +292,20 @@ void BondHarmonicKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const if (vflag_atom) { if (newton_bond || i < nlocal) { - v_vatom(i,0) += 0.5*v[0]; - v_vatom(i,1) += 0.5*v[1]; - v_vatom(i,2) += 0.5*v[2]; - v_vatom(i,3) += 0.5*v[3]; - v_vatom(i,4) += 0.5*v[4]; - v_vatom(i,5) += 0.5*v[5]; + d_vatom(i,0) += 0.5*v[0]; + d_vatom(i,1) += 0.5*v[1]; + d_vatom(i,2) += 0.5*v[2]; + d_vatom(i,3) += 0.5*v[3]; + d_vatom(i,4) += 0.5*v[4]; + d_vatom(i,5) += 0.5*v[5]; } if (newton_bond || j < nlocal) { - v_vatom(j,0) += 0.5*v[0]; - v_vatom(j,1) += 0.5*v[1]; - v_vatom(j,2) += 0.5*v[2]; - v_vatom(j,3) += 0.5*v[3]; - v_vatom(j,4) += 0.5*v[4]; - v_vatom(j,5) += 0.5*v[5]; + d_vatom(j,0) += 0.5*v[0]; + d_vatom(j,1) += 0.5*v[1]; + d_vatom(j,2) += 0.5*v[2]; + d_vatom(j,3) += 0.5*v[3]; + d_vatom(j,4) += 0.5*v[4]; + d_vatom(j,5) += 0.5*v[5]; } } } @@ -317,7 +313,10 @@ void BondHarmonicKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const /* ---------------------------------------------------------------------- */ +namespace LAMMPS_NS { template class BondHarmonicKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class BondHarmonicKokkos<LMPHostType>; -#endif \ No newline at end of file +#endif +} + diff --git a/src/KOKKOS/bond_harmonic_kokkos.h b/src/KOKKOS/bond_harmonic_kokkos.h index f2d692c9cc24825569c84d85485fb806f4354761..dc42fbcd5c509623f892213efd493dad62055c14 100755 --- a/src/KOKKOS/bond_harmonic_kokkos.h +++ b/src/KOKKOS/bond_harmonic_kokkos.h @@ -60,23 +60,21 @@ class BondHarmonicKokkos : public BondHarmonic { class NeighborKokkos *neighborKK; - typename ArrayTypes<DeviceType>::t_x_array_randomread x; - typename ArrayTypes<DeviceType>::t_f_array f; - typename ArrayTypes<DeviceType>::t_int_2d bondlist; + typedef ArrayTypes<DeviceType> AT; + typename AT::t_x_array_randomread x; + typename Kokkos::View<double*[3],typename AT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > f; + typename AT::t_int_2d bondlist; - DAT::tdual_efloat_1d k_eatom; - DAT::tdual_virial_array k_vatom; - DAT::t_efloat_1d d_eatom; - DAT::t_virial_array d_vatom; + Kokkos::DualView<E_FLOAT*,Kokkos::LayoutRight,DeviceType> k_eatom; + Kokkos::DualView<F_FLOAT*[6],Kokkos::LayoutRight,DeviceType> k_vatom; + Kokkos::View<E_FLOAT*,Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > d_eatom; + Kokkos::View<F_FLOAT*[6],Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > d_vatom; int nlocal,newton_bond; int eflag,vflag; - DAT::tdual_ffloat_1d k_k; - DAT::tdual_ffloat_1d k_r0; - - DAT::t_ffloat_1d d_k; - DAT::t_ffloat_1d d_r0; + typename AT::t_ffloat_1d d_k; + typename AT::t_ffloat_1d d_r0; virtual void allocate(); }; diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index eec7f91ee2e0f3946d98ae1135644bf9fd21b3b9..9b3064f219c09bf6b0ed8b6f073acaf37b8a46dc 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -105,9 +105,9 @@ void CommKokkos::init() int check_forward = 0; int check_reverse = 0; - if (force->pair && !force->pair->execution_space == Device) + if (force->pair && (force->pair->execution_space == Host)) check_forward += force->pair->comm_forward; - if (force->pair && !force->pair->execution_space == Device) + if (force->pair && (force->pair->execution_space == Host)) check_reverse += force->pair->comm_reverse; for (int i = 0; i < modify->nfix; i++) { @@ -995,3 +995,4 @@ void CommKokkos::grow_swap(int n) memory->grow(maxsendlist,n,"comm:maxsendlist"); for (int i=0;i<maxswap;i++) maxsendlist[i]=size; } + diff --git a/src/KOKKOS/compute_temp_kokkos.cpp b/src/KOKKOS/compute_temp_kokkos.cpp index eeb5bdcf003d8ecaafaa1999c3f5894cda51cadf..6a24591d6cbb66842df6cddefeef873f8f78203b 100755 --- a/src/KOKKOS/compute_temp_kokkos.cpp +++ b/src/KOKKOS/compute_temp_kokkos.cpp @@ -149,7 +149,10 @@ void ComputeTempKokkos<DeviceType>::operator()(TagComputeTempVector<RMASS>, cons } } +namespace LAMMPS_NS { template class ComputeTempKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class ComputeTempKokkos<LMPHostType>; -#endif \ No newline at end of file +#endif +} + diff --git a/src/KOKKOS/dihedral_charmm_kokkos.cpp b/src/KOKKOS/dihedral_charmm_kokkos.cpp index 6042a1a75b23b0b6cc1fbc2d579630bf9bb36e2e..3ae37993c100eb6c9b69bbef1223fe5c5ecfe440 100755 --- a/src/KOKKOS/dihedral_charmm_kokkos.cpp +++ b/src/KOKKOS/dihedral_charmm_kokkos.cpp @@ -45,8 +45,8 @@ DihedralCharmmKokkos<DeviceType>::DihedralCharmmKokkos(LAMMPS *lmp) : DihedralCh datamask_read = X_MASK | F_MASK | Q_MASK | ENERGY_MASK | VIRIAL_MASK | TYPE_MASK; datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; - k_warning_flag = DAT::tdual_int_scalar("Dihedral:warning_flag"); - d_warning_flag = k_warning_flag.view<DeviceType>(); + k_warning_flag = Kokkos::DualView<int,DeviceType>("Dihedral:warning_flag"); + d_warning_flag = k_warning_flag.template view<DeviceType>(); h_warning_flag = k_warning_flag.h_view; } @@ -80,30 +80,26 @@ void DihedralCharmmKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"dihedral:eatom"); - d_eatom = k_eatom.d_view; + if(k_eatom.dimension_0()<maxeatom) { + memory->destroy_kokkos(k_eatom,eatom); + memory->create_kokkos(k_eatom,eatom,maxeatom,"dihedral:eatom"); + d_eatom = k_eatom.d_view; + k_eatom_pair = Kokkos::DualView<E_FLOAT*,Kokkos::LayoutRight,DeviceType>("dihedral:eatom_pair",maxeatom); + d_eatom_pair = k_eatom.d_view; + } } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"dihedral:vatom"); - d_vatom = k_vatom.d_view; + if(k_vatom.dimension_0()<maxvatom) { + memory->destroy_kokkos(k_vatom,vatom); + memory->create_kokkos(k_vatom,vatom,maxvatom,6,"dihedral:vatom"); + d_vatom = k_vatom.d_view; + k_vatom_pair = Kokkos::DualView<F_FLOAT*[6],Kokkos::LayoutRight,DeviceType>("dihedral:vatom_pair",maxvatom); + d_vatom_pair = k_vatom.d_view; + } } - k_eatom_pair = DAT::tdual_efloat_1d("dihedral:eatom_pair",maxeatom); - k_vatom_pair = DAT::tdual_virial_array("dihedral:vatom_pair",maxvatom); - atomKK->sync(execution_space,datamask_read); - k_lj14_1.template sync<DeviceType>(); - k_lj14_2.template sync<DeviceType>(); - k_lj14_3.template sync<DeviceType>(); - k_lj14_4.template sync<DeviceType>(); - k_k.template sync<DeviceType>(); - k_multiplicity.template sync<DeviceType>(); - k_shift.template sync<DeviceType>(); - k_cos_shift.template sync<DeviceType>(); - k_sin_shift.template sync<DeviceType>(); - k_weight.template sync<DeviceType>(); + //atomKK->sync(execution_space,datamask_read); if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); else atomKK->modified(execution_space,F_MASK); @@ -423,25 +419,24 @@ template<class DeviceType> void DihedralCharmmKokkos<DeviceType>::allocate() { DihedralCharmm::allocate(); +} - int n = atom->ntypes; - k_lj14_1 = DAT::tdual_ffloat_2d("DihedralCharmm:lj14_1",n+1,n+1); - k_lj14_2 = DAT::tdual_ffloat_2d("DihedralCharmm:lj14_2",n+1,n+1); - k_lj14_3 = DAT::tdual_ffloat_2d("DihedralCharmm:lj14_3",n+1,n+1); - k_lj14_4 = DAT::tdual_ffloat_2d("DihedralCharmm:lj14_4",n+1,n+1); +/* ---------------------------------------------------------------------- + set coeffs for one or more types +------------------------------------------------------------------------- */ - d_lj14_1 = k_lj14_1.d_view; - d_lj14_2 = k_lj14_2.d_view; - d_lj14_3 = k_lj14_3.d_view; - d_lj14_4 = k_lj14_4.d_view; +template<class DeviceType> +void DihedralCharmmKokkos<DeviceType>::coeff(int narg, char **arg) +{ + DihedralCharmm::coeff(narg, arg); int nd = atom->ndihedraltypes; - k_k = DAT::tdual_ffloat_1d("DihedralCharmm::k",nd+1); - k_multiplicity = DAT::tdual_ffloat_1d("DihedralCharmm::multiplicity",nd+1); - k_shift = DAT::tdual_ffloat_1d("DihedralCharmm::shift",nd+1); - k_cos_shift = DAT::tdual_ffloat_1d("DihedralCharmm::cos_shift",nd+1); - k_sin_shift = DAT::tdual_ffloat_1d("DihedralCharmm::sin_shift",nd+1); - k_weight = DAT::tdual_ffloat_1d("DihedralCharmm::weight",nd+1); + Kokkos::DualView<F_FLOAT*,DeviceType> k_k("DihedralCharmm::k",nd+1); + Kokkos::DualView<F_FLOAT*,DeviceType> k_multiplicity("DihedralCharmm::multiplicity",nd+1); + Kokkos::DualView<F_FLOAT*,DeviceType> k_shift("DihedralCharmm::shift",nd+1); + Kokkos::DualView<F_FLOAT*,DeviceType> k_cos_shift("DihedralCharmm::cos_shift",nd+1); + Kokkos::DualView<F_FLOAT*,DeviceType> k_sin_shift("DihedralCharmm::sin_shift",nd+1); + Kokkos::DualView<F_FLOAT*,DeviceType> k_weight("DihedralCharmm::weight",nd+1); d_k = k_k.d_view; d_multiplicity = k_multiplicity.d_view; @@ -449,16 +444,6 @@ void DihedralCharmmKokkos<DeviceType>::allocate() d_cos_shift = k_cos_shift.d_view; d_sin_shift = k_sin_shift.d_view; d_weight = k_weight.d_view; -} - -/* ---------------------------------------------------------------------- - set coeffs for one or more types -------------------------------------------------------------------------- */ - -template<class DeviceType> -void DihedralCharmmKokkos<DeviceType>::coeff(int narg, char **arg) -{ - DihedralCharmm::coeff(narg, arg); int n = atom->ndihedraltypes; for (int i = 1; i <= n; i++) { @@ -476,6 +461,13 @@ void DihedralCharmmKokkos<DeviceType>::coeff(int narg, char **arg) k_cos_shift.template modify<LMPHostType>(); k_sin_shift.template modify<LMPHostType>(); k_weight.template modify<LMPHostType>(); + + k_k.template sync<DeviceType>(); + k_multiplicity.template sync<DeviceType>(); + k_shift.template sync<DeviceType>(); + k_cos_shift.template sync<DeviceType>(); + k_sin_shift.template sync<DeviceType>(); + k_weight.template sync<DeviceType>(); } /* ---------------------------------------------------------------------- @@ -487,6 +479,18 @@ void DihedralCharmmKokkos<DeviceType>::init_style() { DihedralCharmm::init_style(); + int n = atom->ntypes; + Kokkos::DualView<F_FLOAT**,Kokkos::LayoutRight,DeviceType> k_lj14_1("DihedralCharmm:lj14_1",n+1,n+1); + Kokkos::DualView<F_FLOAT**,Kokkos::LayoutRight,DeviceType> k_lj14_2("DihedralCharmm:lj14_2",n+1,n+1); + Kokkos::DualView<F_FLOAT**,Kokkos::LayoutRight,DeviceType> k_lj14_3("DihedralCharmm:lj14_3",n+1,n+1); + Kokkos::DualView<F_FLOAT**,Kokkos::LayoutRight,DeviceType> k_lj14_4("DihedralCharmm:lj14_4",n+1,n+1); + + d_lj14_1 = k_lj14_1.d_view; + d_lj14_2 = k_lj14_2.d_view; + d_lj14_3 = k_lj14_3.d_view; + d_lj14_4 = k_lj14_4.d_view; + + if (weightflag) { int n = atom->ntypes; for (int i = 1; i <= n; i++) { @@ -503,6 +507,12 @@ void DihedralCharmmKokkos<DeviceType>::init_style() k_lj14_2.template modify<LMPHostType>(); k_lj14_3.template modify<LMPHostType>(); k_lj14_4.template modify<LMPHostType>(); + + k_lj14_1.template sync<DeviceType>(); + k_lj14_2.template sync<DeviceType>(); + k_lj14_3.template sync<DeviceType>(); + k_lj14_4.template sync<DeviceType>(); + } /* ---------------------------------------------------------------------- @@ -524,10 +534,6 @@ void DihedralCharmmKokkos<DeviceType>::ev_tally(EVM_FLOAT &evm, const int i1, co E_FLOAT edihedralquarter; F_FLOAT v[6]; - // The eatom and vatom arrays are atomic - Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_eatom = k_eatom.view<DeviceType>(); - Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_vatom = k_vatom.view<DeviceType>(); - if (eflag_either) { if (eflag_global) { if (newton_bond) evm.emol += edihedral; @@ -541,10 +547,10 @@ void DihedralCharmmKokkos<DeviceType>::ev_tally(EVM_FLOAT &evm, const int i1, co } if (eflag_atom) { edihedralquarter = 0.25*edihedral; - if (newton_bond || i1 < nlocal) v_eatom[i1] += edihedralquarter; - if (newton_bond || i2 < nlocal) v_eatom[i2] += edihedralquarter; - if (newton_bond || i3 < nlocal) v_eatom[i3] += edihedralquarter; - if (newton_bond || i4 < nlocal) v_eatom[i4] += edihedralquarter; + if (newton_bond || i1 < nlocal) d_eatom[i1] += edihedralquarter; + if (newton_bond || i2 < nlocal) d_eatom[i2] += edihedralquarter; + if (newton_bond || i3 < nlocal) d_eatom[i3] += edihedralquarter; + if (newton_bond || i4 < nlocal) d_eatom[i4] += edihedralquarter; } } @@ -602,36 +608,36 @@ void DihedralCharmmKokkos<DeviceType>::ev_tally(EVM_FLOAT &evm, const int i1, co if (vflag_atom) { if (newton_bond || i1 < nlocal) { - v_vatom(i1,0) += 0.25*v[0]; - v_vatom(i1,1) += 0.25*v[1]; - v_vatom(i1,2) += 0.25*v[2]; - v_vatom(i1,3) += 0.25*v[3]; - v_vatom(i1,4) += 0.25*v[4]; - v_vatom(i1,5) += 0.25*v[5]; + d_vatom(i1,0) += 0.25*v[0]; + d_vatom(i1,1) += 0.25*v[1]; + d_vatom(i1,2) += 0.25*v[2]; + d_vatom(i1,3) += 0.25*v[3]; + d_vatom(i1,4) += 0.25*v[4]; + d_vatom(i1,5) += 0.25*v[5]; } if (newton_bond || i2 < nlocal) { - v_vatom(i2,0) += 0.25*v[0]; - v_vatom(i2,1) += 0.25*v[1]; - v_vatom(i2,2) += 0.25*v[2]; - v_vatom(i2,3) += 0.25*v[3]; - v_vatom(i2,4) += 0.25*v[4]; - v_vatom(i2,5) += 0.25*v[5]; + d_vatom(i2,0) += 0.25*v[0]; + d_vatom(i2,1) += 0.25*v[1]; + d_vatom(i2,2) += 0.25*v[2]; + d_vatom(i2,3) += 0.25*v[3]; + d_vatom(i2,4) += 0.25*v[4]; + d_vatom(i2,5) += 0.25*v[5]; } if (newton_bond || i3 < nlocal) { - v_vatom(i3,0) += 0.25*v[0]; - v_vatom(i3,1) += 0.25*v[1]; - v_vatom(i3,2) += 0.25*v[2]; - v_vatom(i3,3) += 0.25*v[3]; - v_vatom(i3,4) += 0.25*v[4]; - v_vatom(i3,5) += 0.25*v[5]; + d_vatom(i3,0) += 0.25*v[0]; + d_vatom(i3,1) += 0.25*v[1]; + d_vatom(i3,2) += 0.25*v[2]; + d_vatom(i3,3) += 0.25*v[3]; + d_vatom(i3,4) += 0.25*v[4]; + d_vatom(i3,5) += 0.25*v[5]; } if (newton_bond || i4 < nlocal) { - v_vatom(i4,0) += 0.25*v[0]; - v_vatom(i4,1) += 0.25*v[1]; - v_vatom(i4,2) += 0.25*v[2]; - v_vatom(i4,3) += 0.25*v[3]; - v_vatom(i4,4) += 0.25*v[4]; - v_vatom(i4,5) += 0.25*v[5]; + d_vatom(i4,0) += 0.25*v[0]; + d_vatom(i4,1) += 0.25*v[1]; + d_vatom(i4,2) += 0.25*v[2]; + d_vatom(i4,3) += 0.25*v[3]; + d_vatom(i4,4) += 0.25*v[4]; + d_vatom(i4,5) += 0.25*v[5]; } } } @@ -651,9 +657,6 @@ void DihedralCharmmKokkos<DeviceType>::ev_tally(EVM_FLOAT &evm, const int i, con E_FLOAT evdwlhalf,ecoulhalf,epairhalf; F_FLOAT v[6]; - // The eatom and vatom arrays are atomic - Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_eatom_pair = k_eatom_pair.view<DeviceType>(); - Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_vatom_pair = k_vatom_pair.view<DeviceType>(); if (eflag_either) { if (eflag_global) { @@ -675,8 +678,8 @@ void DihedralCharmmKokkos<DeviceType>::ev_tally(EVM_FLOAT &evm, const int i, con } if (eflag_atom) { epairhalf = 0.5 * (evdwl + ecoul); - if (newton_bond || i < nlocal) v_eatom_pair[i] += epairhalf; - if (newton_bond || j < nlocal) v_eatom_pair[j] += epairhalf; + if (newton_bond || i < nlocal) d_eatom_pair[i] += epairhalf; + if (newton_bond || j < nlocal) d_eatom_pair[j] += epairhalf; } } @@ -718,20 +721,20 @@ void DihedralCharmmKokkos<DeviceType>::ev_tally(EVM_FLOAT &evm, const int i, con if (vflag_atom) { if (newton_bond || i < nlocal) { - v_vatom_pair(i,0) += 0.5*v[0]; - v_vatom_pair(i,1) += 0.5*v[1]; - v_vatom_pair(i,2) += 0.5*v[2]; - v_vatom_pair(i,3) += 0.5*v[3]; - v_vatom_pair(i,4) += 0.5*v[4]; - v_vatom_pair(i,5) += 0.5*v[5]; + d_vatom_pair(i,0) += 0.5*v[0]; + d_vatom_pair(i,1) += 0.5*v[1]; + d_vatom_pair(i,2) += 0.5*v[2]; + d_vatom_pair(i,3) += 0.5*v[3]; + d_vatom_pair(i,4) += 0.5*v[4]; + d_vatom_pair(i,5) += 0.5*v[5]; } if (newton_bond || j < nlocal) { - v_vatom_pair(j,0) += 0.5*v[0]; - v_vatom_pair(j,1) += 0.5*v[1]; - v_vatom_pair(j,2) += 0.5*v[2]; - v_vatom_pair(j,3) += 0.5*v[3]; - v_vatom_pair(j,4) += 0.5*v[4]; - v_vatom_pair(j,5) += 0.5*v[5]; + d_vatom_pair(j,0) += 0.5*v[0]; + d_vatom_pair(j,1) += 0.5*v[1]; + d_vatom_pair(j,2) += 0.5*v[2]; + d_vatom_pair(j,3) += 0.5*v[3]; + d_vatom_pair(j,4) += 0.5*v[4]; + d_vatom_pair(j,5) += 0.5*v[5]; } } } @@ -739,7 +742,10 @@ void DihedralCharmmKokkos<DeviceType>::ev_tally(EVM_FLOAT &evm, const int i, con /* ---------------------------------------------------------------------- */ +namespace LAMMPS_NS { template class DihedralCharmmKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class DihedralCharmmKokkos<LMPHostType>; -#endif \ No newline at end of file +#endif +} + diff --git a/src/KOKKOS/dihedral_charmm_kokkos.h b/src/KOKKOS/dihedral_charmm_kokkos.h index 2ebae7e98541b0e3fbabfba98c97f93d76b3e438..2ee596353a6a7a6e99d6420f90e1d2f4fa441231 100755 --- a/src/KOKKOS/dihedral_charmm_kokkos.h +++ b/src/KOKKOS/dihedral_charmm_kokkos.h @@ -131,47 +131,35 @@ class DihedralCharmmKokkos : public DihedralCharmm { typename AT::t_f_array f; typename AT::t_int_2d dihedrallist; - DAT::tdual_efloat_1d k_eatom; - DAT::tdual_virial_array k_vatom; - DAT::t_efloat_1d d_eatom; - DAT::t_virial_array d_vatom; + Kokkos::DualView<E_FLOAT*,Kokkos::LayoutRight,DeviceType> k_eatom; + Kokkos::DualView<F_FLOAT*[6],Kokkos::LayoutRight,DeviceType> k_vatom; + Kokkos::View<E_FLOAT*,Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > d_eatom; + Kokkos::View<F_FLOAT*[6],Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > d_vatom; - DAT::tdual_efloat_1d k_eatom_pair; - DAT::tdual_virial_array k_vatom_pair; - DAT::t_efloat_1d d_eatom_pair; - DAT::t_virial_array d_vatom_pair; + Kokkos::DualView<E_FLOAT*,Kokkos::LayoutRight,DeviceType> k_eatom_pair; + Kokkos::DualView<F_FLOAT*[6],Kokkos::LayoutRight,DeviceType> k_vatom_pair; + Kokkos::View<E_FLOAT*,Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > d_eatom_pair; + Kokkos::View<F_FLOAT*[6],Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > d_vatom_pair; int nlocal,newton_bond; int eflag,vflag; double qqrd2e; - DAT::tdual_int_scalar k_warning_flag; - typename AT::t_int_scalar d_warning_flag; - HAT::t_int_scalar h_warning_flag; - - DAT::tdual_ffloat_2d k_lj14_1; - DAT::tdual_ffloat_2d k_lj14_2; - DAT::tdual_ffloat_2d k_lj14_3; - DAT::tdual_ffloat_2d k_lj14_4; - - DAT::t_ffloat_2d d_lj14_1; - DAT::t_ffloat_2d d_lj14_2; - DAT::t_ffloat_2d d_lj14_3; - DAT::t_ffloat_2d d_lj14_4; - - DAT::tdual_ffloat_1d k_k; - DAT::tdual_ffloat_1d k_multiplicity; - DAT::tdual_ffloat_1d k_shift; - DAT::tdual_ffloat_1d k_sin_shift; - DAT::tdual_ffloat_1d k_cos_shift; - DAT::tdual_ffloat_1d k_weight; - - DAT::t_ffloat_1d d_k; - DAT::t_ffloat_1d d_multiplicity; - DAT::t_ffloat_1d d_shift; - DAT::t_ffloat_1d d_sin_shift; - DAT::t_ffloat_1d d_cos_shift; - DAT::t_ffloat_1d d_weight; + Kokkos::DualView<int,DeviceType> k_warning_flag; + typename Kokkos::DualView<int,DeviceType>::t_dev d_warning_flag; + typename Kokkos::DualView<int,DeviceType>::t_host h_warning_flag; + + typename AT::t_ffloat_2d d_lj14_1; + typename AT::t_ffloat_2d d_lj14_2; + typename AT::t_ffloat_2d d_lj14_3; + typename AT::t_ffloat_2d d_lj14_4; + + typename AT::t_ffloat_1d d_k; + typename AT::t_ffloat_1d d_multiplicity; + typename AT::t_ffloat_1d d_shift; + typename AT::t_ffloat_1d d_sin_shift; + typename AT::t_ffloat_1d d_cos_shift; + typename AT::t_ffloat_1d d_weight; virtual void allocate(); }; diff --git a/src/KOKKOS/dihedral_opls_kokkos.cpp b/src/KOKKOS/dihedral_opls_kokkos.cpp index 608e8d97545b370b2a014867ac4eb6538e437c81..ff28391ec3c3141cb7d742b149b468733776dc1b 100755 --- a/src/KOKKOS/dihedral_opls_kokkos.cpp +++ b/src/KOKKOS/dihedral_opls_kokkos.cpp @@ -515,7 +515,10 @@ void DihedralOPLSKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int i1, const /* ---------------------------------------------------------------------- */ +namespace LAMMPS_NS { template class DihedralOPLSKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class DihedralOPLSKokkos<LMPHostType>; -#endif \ No newline at end of file +#endif +} + diff --git a/src/KOKKOS/domain_kokkos.cpp b/src/KOKKOS/domain_kokkos.cpp index 1c88836afda5021860ef407c0c0ff720f67b32f6..cf65316ec9b34517b74086a550e8fb2e8342aebc 100644 --- a/src/KOKKOS/domain_kokkos.cpp +++ b/src/KOKKOS/domain_kokkos.cpp @@ -600,3 +600,4 @@ void DomainKokkos::operator()(TagDomain_x2lamda, const int &i) const { x(i,1) = h_inv[1]*delta[1] + h_inv[3]*delta[2]; x(i,2) = h_inv[2]*delta[2]; } + diff --git a/src/KOKKOS/fix_deform_kokkos.cpp b/src/KOKKOS/fix_deform_kokkos.cpp index 0050fdc5e43f0a3aab38834643e616c7d912213f..b3316022f1076afe074fa4858729c70484b56bc1 100755 --- a/src/KOKKOS/fix_deform_kokkos.cpp +++ b/src/KOKKOS/fix_deform_kokkos.cpp @@ -372,3 +372,4 @@ void FixDeformKokkos::end_of_step() if (kspace_flag) force->kspace->setup(); } + diff --git a/src/KOKKOS/fix_langevin_kokkos.cpp b/src/KOKKOS/fix_langevin_kokkos.cpp index e7dc6f9ba217aa810b63b778ca3a94a58b5f3a6f..de8920b55690d5a437a92982dd270819e0548218 100644 --- a/src/KOKKOS/fix_langevin_kokkos.cpp +++ b/src/KOKKOS/fix_langevin_kokkos.cpp @@ -804,7 +804,10 @@ void FixLangevinKokkos<DeviceType>::cleanup_copy() vatom = NULL; } +namespace LAMMPS_NS { template class FixLangevinKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class FixLangevinKokkos<LMPHostType>; #endif +} + diff --git a/src/KOKKOS/fix_nh_kokkos.cpp b/src/KOKKOS/fix_nh_kokkos.cpp index d8225b90b5afb4d2282b7514a9ec4e9011da7388..28b7ff55b6db5a9b146040307d33ed24339a5770 100755 --- a/src/KOKKOS/fix_nh_kokkos.cpp +++ b/src/KOKKOS/fix_nh_kokkos.cpp @@ -732,7 +732,10 @@ void FixNHKokkos<DeviceType>::pre_exchange() } } +namespace LAMMPS_NS { template class FixNHKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class FixNHKokkos<LMPHostType>; -#endif \ No newline at end of file +#endif +} + diff --git a/src/KOKKOS/fix_nph_kokkos.cpp b/src/KOKKOS/fix_nph_kokkos.cpp index 08a7ea166d0f8a7cd255893082aabcd54f852e2f..350832a3d70d908f2346336dc230e9a91a3eeee6 100755 --- a/src/KOKKOS/fix_nph_kokkos.cpp +++ b/src/KOKKOS/fix_nph_kokkos.cpp @@ -68,7 +68,10 @@ FixNPHKokkos<DeviceType>::FixNPHKokkos(LAMMPS *lmp, int narg, char **arg) : this->pflag = 1; } +namespace LAMMPS_NS { template class FixNPHKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class FixNPHKokkos<LMPHostType>; -#endif \ No newline at end of file +#endif +} + diff --git a/src/KOKKOS/fix_npt_kokkos.cpp b/src/KOKKOS/fix_npt_kokkos.cpp index 6ae0add69065d70ce9f9997cd8232cc2a6a3d27c..3832fb3e5771dd796e24bd53fe2e620c14f9232a 100755 --- a/src/KOKKOS/fix_npt_kokkos.cpp +++ b/src/KOKKOS/fix_npt_kokkos.cpp @@ -68,7 +68,10 @@ FixNPTKokkos<DeviceType>::FixNPTKokkos(LAMMPS *lmp, int narg, char **arg) : this->pflag = 1; } +namespace LAMMPS_NS { template class FixNPTKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class FixNPTKokkos<LMPHostType>; #endif +} + diff --git a/src/KOKKOS/fix_nve_kokkos.cpp b/src/KOKKOS/fix_nve_kokkos.cpp index aa7698badcafff22b2c8f2e5473c09d5ceef256d..5e7e9b5298943331ee1863ab8eee723778bdb55a 100644 --- a/src/KOKKOS/fix_nve_kokkos.cpp +++ b/src/KOKKOS/fix_nve_kokkos.cpp @@ -171,7 +171,10 @@ void FixNVEKokkos<DeviceType>::cleanup_copy() vatom = NULL; } +namespace LAMMPS_NS { template class FixNVEKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class FixNVEKokkos<LMPHostType>; #endif +} + diff --git a/src/KOKKOS/fix_nvt_kokkos.cpp b/src/KOKKOS/fix_nvt_kokkos.cpp index 43fa07ea5a64b5b23f462ae081c1ddc644f5a703..bf6ce0de2b4f6ba3eb1d1cc7461716287d7d46ea 100755 --- a/src/KOKKOS/fix_nvt_kokkos.cpp +++ b/src/KOKKOS/fix_nvt_kokkos.cpp @@ -49,7 +49,10 @@ FixNVTKokkos<DeviceType>::FixNVTKokkos(LAMMPS *lmp, int narg, char **arg) : this->tflag = 1; } +namespace LAMMPS_NS { template class FixNVTKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class FixNVTKokkos<LMPHostType>; -#endif \ No newline at end of file +#endif +} + diff --git a/src/KOKKOS/fix_setforce_kokkos.cpp b/src/KOKKOS/fix_setforce_kokkos.cpp index 5162b81b6716785f1a6b055aed739816822a2784..862aab1e21761233b1f2ba004af2f5037cc9d446 100755 --- a/src/KOKKOS/fix_setforce_kokkos.cpp +++ b/src/KOKKOS/fix_setforce_kokkos.cpp @@ -179,7 +179,10 @@ void FixSetForceKokkos<DeviceType>::operator()(TagFixSetForceNonConstant, const } } +namespace LAMMPS_NS { template class FixSetForceKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class FixSetForceKokkos<LMPHostType>; -#endif \ No newline at end of file +#endif +} + diff --git a/src/KOKKOS/fix_wall_reflect_kokkos.cpp b/src/KOKKOS/fix_wall_reflect_kokkos.cpp index cd6501d50ed3e10c0f4e6db08569c2c92f560fb8..27c0080234261ad1034b99ab5da6f7c764f82d72 100755 --- a/src/KOKKOS/fix_wall_reflect_kokkos.cpp +++ b/src/KOKKOS/fix_wall_reflect_kokkos.cpp @@ -105,7 +105,10 @@ void FixWallReflectKokkos<DeviceType>::operator()(TagFixWallReflectPostIntegrate /* ---------------------------------------------------------------------- */ +namespace LAMMPS_NS { template class FixWallReflectKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class FixWallReflectKokkos<LMPHostType>; #endif +} + diff --git a/src/KOKKOS/improper_harmonic_kokkos.cpp b/src/KOKKOS/improper_harmonic_kokkos.cpp index f614b3dbc2f9925034e880c5d9cc8df25623f103..34d3d437d6178513e97d45aef0b1f106a61ebe48 100755 --- a/src/KOKKOS/improper_harmonic_kokkos.cpp +++ b/src/KOKKOS/improper_harmonic_kokkos.cpp @@ -47,8 +47,8 @@ ImproperHarmonicKokkos<DeviceType>::ImproperHarmonicKokkos(LAMMPS *lmp) : Improp datamask_read = X_MASK | F_MASK | ENERGY_MASK | VIRIAL_MASK; datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; - k_warning_flag = DAT::tdual_int_scalar("Dihedral:warning_flag"); - d_warning_flag = k_warning_flag.view<DeviceType>(); + k_warning_flag = Kokkos::DualView<int,DeviceType>("Dihedral:warning_flag"); + d_warning_flag = k_warning_flag.template view<DeviceType>(); h_warning_flag = k_warning_flag.h_view; } @@ -77,17 +77,21 @@ void ImproperHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"improper:eatom"); - d_eatom = k_eatom.d_view; + if(k_eatom.dimension_0()<maxeatom) { + memory->destroy_kokkos(k_eatom,eatom); + memory->create_kokkos(k_eatom,eatom,maxeatom,"improper:eatom"); + d_eatom = k_eatom.d_view; + } } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"improper:vatom"); - d_vatom = k_vatom.d_view; + if(k_vatom.dimension_0()<maxvatom) { + memory->destroy_kokkos(k_vatom,vatom); + memory->create_kokkos(k_vatom,vatom,maxvatom,6,"improper:vatom"); + d_vatom = k_vatom.d_view; + } } - atomKK->sync(execution_space,datamask_read); + //atomKK->sync(execution_space,datamask_read); k_k.template sync<DeviceType>(); k_chi.template sync<DeviceType>(); if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); @@ -124,7 +128,7 @@ void ImproperHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagImproperHarmonicCompute<0,0> >(0,nimproperlist),*this); } } - DeviceType::fence(); + //DeviceType::fence(); // error check @@ -161,9 +165,6 @@ template<int NEWTON_BOND, int EVFLAG> KOKKOS_INLINE_FUNCTION void ImproperHarmonicKokkos<DeviceType>::operator()(TagImproperHarmonicCompute<NEWTON_BOND,EVFLAG>, const int &n, EV_FLOAT& ev) const { - // The f array is atomic - Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > a_f = f; - const int i1 = improperlist(n,0); const int i2 = improperlist(n,1); const int i3 = improperlist(n,2); @@ -262,27 +263,27 @@ void ImproperHarmonicKokkos<DeviceType>::operator()(TagImproperHarmonicCompute<N // apply force to each of 4 atoms if (NEWTON_BOND || i1 < nlocal) { - a_f(i1,0) += f1[0]; - a_f(i1,1) += f1[1]; - a_f(i1,2) += f1[2]; + f(i1,0) += f1[0]; + f(i1,1) += f1[1]; + f(i1,2) += f1[2]; } if (NEWTON_BOND || i2 < nlocal) { - a_f(i2,0) += f2[0]; - a_f(i2,1) += f2[1]; - a_f(i2,2) += f2[2]; + f(i2,0) += f2[0]; + f(i2,1) += f2[1]; + f(i2,2) += f2[2]; } if (NEWTON_BOND || i3 < nlocal) { - a_f(i3,0) += f3[0]; - a_f(i3,1) += f3[1]; - a_f(i3,2) += f3[2]; + f(i3,0) += f3[0]; + f(i3,1) += f3[1]; + f(i3,2) += f3[2]; } if (NEWTON_BOND || i4 < nlocal) { - a_f(i4,0) += f4[0]; - a_f(i4,1) += f4[1]; - a_f(i4,2) += f4[2]; + f(i4,0) += f4[0]; + f(i4,1) += f4[1]; + f(i4,2) += f4[2]; } if (EVFLAG) @@ -306,8 +307,8 @@ void ImproperHarmonicKokkos<DeviceType>::allocate() ImproperHarmonic::allocate(); int n = atom->nimpropertypes; - k_k = DAT::tdual_ffloat_1d("ImproperHarmonic::k",n+1); - k_chi = DAT::tdual_ffloat_1d("ImproperHarmonic::chi",n+1); + k_k = Kokkos::DualView<F_FLOAT*,DeviceType>("ImproperHarmonic::k",n+1); + k_chi = Kokkos::DualView<F_FLOAT*,DeviceType>("ImproperHarmonic::chi",n+1); d_k = k_k.d_view; d_chi = k_chi.d_view; @@ -351,9 +352,6 @@ void ImproperHarmonicKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int i1, co E_FLOAT eimproperquarter; F_FLOAT v[6]; - // The eatom and vatom arrays are atomic - Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_eatom = k_eatom.view<DeviceType>(); - Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_vatom = k_vatom.view<DeviceType>(); if (eflag_either) { if (eflag_global) { @@ -368,10 +366,10 @@ void ImproperHarmonicKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int i1, co } if (eflag_atom) { eimproperquarter = 0.25*eimproper; - if (newton_bond || i1 < nlocal) v_eatom[i1] += eimproperquarter; - if (newton_bond || i2 < nlocal) v_eatom[i2] += eimproperquarter; - if (newton_bond || i3 < nlocal) v_eatom[i3] += eimproperquarter; - if (newton_bond || i4 < nlocal) v_eatom[i4] += eimproperquarter; + if (newton_bond || i1 < nlocal) d_eatom[i1] += eimproperquarter; + if (newton_bond || i2 < nlocal) d_eatom[i2] += eimproperquarter; + if (newton_bond || i3 < nlocal) d_eatom[i3] += eimproperquarter; + if (newton_bond || i4 < nlocal) d_eatom[i4] += eimproperquarter; } } @@ -429,36 +427,36 @@ void ImproperHarmonicKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int i1, co if (vflag_atom) { if (newton_bond || i1 < nlocal) { - v_vatom(i1,0) += 0.25*v[0]; - v_vatom(i1,1) += 0.25*v[1]; - v_vatom(i1,2) += 0.25*v[2]; - v_vatom(i1,3) += 0.25*v[3]; - v_vatom(i1,4) += 0.25*v[4]; - v_vatom(i1,5) += 0.25*v[5]; + d_vatom(i1,0) += 0.25*v[0]; + d_vatom(i1,1) += 0.25*v[1]; + d_vatom(i1,2) += 0.25*v[2]; + d_vatom(i1,3) += 0.25*v[3]; + d_vatom(i1,4) += 0.25*v[4]; + d_vatom(i1,5) += 0.25*v[5]; } if (newton_bond || i2 < nlocal) { - v_vatom(i2,0) += 0.25*v[0]; - v_vatom(i2,1) += 0.25*v[1]; - v_vatom(i2,2) += 0.25*v[2]; - v_vatom(i2,3) += 0.25*v[3]; - v_vatom(i2,4) += 0.25*v[4]; - v_vatom(i2,5) += 0.25*v[5]; + d_vatom(i2,0) += 0.25*v[0]; + d_vatom(i2,1) += 0.25*v[1]; + d_vatom(i2,2) += 0.25*v[2]; + d_vatom(i2,3) += 0.25*v[3]; + d_vatom(i2,4) += 0.25*v[4]; + d_vatom(i2,5) += 0.25*v[5]; } if (newton_bond || i3 < nlocal) { - v_vatom(i3,0) += 0.25*v[0]; - v_vatom(i3,1) += 0.25*v[1]; - v_vatom(i3,2) += 0.25*v[2]; - v_vatom(i3,3) += 0.25*v[3]; - v_vatom(i3,4) += 0.25*v[4]; - v_vatom(i3,5) += 0.25*v[5]; + d_vatom(i3,0) += 0.25*v[0]; + d_vatom(i3,1) += 0.25*v[1]; + d_vatom(i3,2) += 0.25*v[2]; + d_vatom(i3,3) += 0.25*v[3]; + d_vatom(i3,4) += 0.25*v[4]; + d_vatom(i3,5) += 0.25*v[5]; } if (newton_bond || i4 < nlocal) { - v_vatom(i4,0) += 0.25*v[0]; - v_vatom(i4,1) += 0.25*v[1]; - v_vatom(i4,2) += 0.25*v[2]; - v_vatom(i4,3) += 0.25*v[3]; - v_vatom(i4,4) += 0.25*v[4]; - v_vatom(i4,5) += 0.25*v[5]; + d_vatom(i4,0) += 0.25*v[0]; + d_vatom(i4,1) += 0.25*v[1]; + d_vatom(i4,2) += 0.25*v[2]; + d_vatom(i4,3) += 0.25*v[3]; + d_vatom(i4,4) += 0.25*v[4]; + d_vatom(i4,5) += 0.25*v[5]; } } } @@ -466,7 +464,10 @@ void ImproperHarmonicKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int i1, co /* ---------------------------------------------------------------------- */ +namespace LAMMPS_NS { template class ImproperHarmonicKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class ImproperHarmonicKokkos<LMPHostType>; -#endif \ No newline at end of file +#endif +} + diff --git a/src/KOKKOS/improper_harmonic_kokkos.h b/src/KOKKOS/improper_harmonic_kokkos.h index dc7a12af4d25ccfcb00345b78cc465c41f12d497..c83df68d5e54b91322b3a0f363a39cbaac809f2c 100755 --- a/src/KOKKOS/improper_harmonic_kokkos.h +++ b/src/KOKKOS/improper_harmonic_kokkos.h @@ -63,26 +63,26 @@ class ImproperHarmonicKokkos : public ImproperHarmonic { class NeighborKokkos *neighborKK; typename AT::t_x_array_randomread x; - typename AT::t_f_array f; + typename Kokkos::View<double*[3],typename AT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > f; typename AT::t_int_2d improperlist; - DAT::tdual_efloat_1d k_eatom; - DAT::tdual_virial_array k_vatom; - DAT::t_efloat_1d d_eatom; - DAT::t_virial_array d_vatom; + Kokkos::DualView<E_FLOAT*,Kokkos::LayoutRight,DeviceType> k_eatom; + Kokkos::DualView<F_FLOAT*[6],Kokkos::LayoutRight,DeviceType> k_vatom; + Kokkos::View<E_FLOAT*,Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > d_eatom; + Kokkos::View<F_FLOAT*[6],Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > d_vatom; int nlocal,newton_bond; int eflag,vflag; - DAT::tdual_int_scalar k_warning_flag; - typename AT::t_int_scalar d_warning_flag; - HAT::t_int_scalar h_warning_flag; + Kokkos::DualView<int,DeviceType> k_warning_flag; + typename Kokkos::DualView<int,DeviceType>::t_dev d_warning_flag; + typename Kokkos::DualView<int,DeviceType>::t_host h_warning_flag; - DAT::tdual_ffloat_1d k_k; - DAT::tdual_ffloat_1d k_chi; + Kokkos::DualView<F_FLOAT*,DeviceType> k_k; + Kokkos::DualView<F_FLOAT*,DeviceType> k_chi; - DAT::t_ffloat_1d d_k; - DAT::t_ffloat_1d d_chi; + typename Kokkos::DualView<F_FLOAT*,DeviceType>::t_dev d_k; + typename Kokkos::DualView<F_FLOAT*,DeviceType>::t_dev d_chi; virtual void allocate(); }; diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index c198d97e1429f1a9c5bffee5789e946edaaaa1f9..770598ecd3f0cf885d43965507b2c8ab470fc436 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -274,3 +274,4 @@ int KokkosLMP::neigh_count(int m) return nneigh; } + diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h index c219b6628b0b4aa56a8d1d00b402b73c019318b2..6d8e8226348d13988216b49664ec39e8d6c628e3 100644 --- a/src/KOKKOS/kokkos_type.h +++ b/src/KOKKOS/kokkos_type.h @@ -19,6 +19,11 @@ #include <impl/Kokkos_Timer.hpp> #include <Kokkos_Vectorization.hpp> +#if defined(KOKKOS_HAVE_CXX11) +#undef ISFINITE +#define ISFINITE(x) std::isfinite(x) +#endif + #define MAX_TYPES_STACKPARAMS 12 #define NeighClusterSize 8 @@ -740,7 +745,11 @@ template<class ViewType> void memset_kokkos (ViewType &view) { static MemsetZeroFunctor<typename ViewType::execution_space> f; f.ptr = view.ptr_on_device(); + #ifdef KOKKOS_USING_EXPERIMENTAL_VIEW + Kokkos::parallel_for(view.memory_span()/4, f); + #else Kokkos::parallel_for(view.capacity()*sizeof(typename ViewType::value_type)/4, f); + #endif ViewType::execution_space::fence(); } diff --git a/src/KOKKOS/modify_kokkos.cpp b/src/KOKKOS/modify_kokkos.cpp index 51903a58f64e89991171908d7e30e566ff462bac..b1b98144a604b6f832a1222667ffcad0790711b4 100644 --- a/src/KOKKOS/modify_kokkos.cpp +++ b/src/KOKKOS/modify_kokkos.cpp @@ -583,3 +583,4 @@ int ModifyKokkos::min_reset_ref() } return itmpall; } + diff --git a/src/KOKKOS/neigh_bond_kokkos.cpp b/src/KOKKOS/neigh_bond_kokkos.cpp index 3c243b9153cb1879e54d988d0c2e0d5adc7a80d9..767def70c2fd4569a95a690a3d4dc13397e59dc0 100755 --- a/src/KOKKOS/neigh_bond_kokkos.cpp +++ b/src/KOKKOS/neigh_bond_kokkos.cpp @@ -1290,7 +1290,10 @@ void NeighBondKokkos<DeviceType>::update_domain_variables() /* ---------------------------------------------------------------------- */ +namespace LAMMPS_NS { template class NeighBondKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class NeighBondKokkos<LMPHostType>; #endif +} + diff --git a/src/KOKKOS/neigh_list_kokkos.cpp b/src/KOKKOS/neigh_list_kokkos.cpp index 302b6def62398f9ff424dda19ec23c3adc2d5c8b..5fe796f84de9ad0c82d3edbe6e660c8e604c8df7 100644 --- a/src/KOKKOS/neigh_list_kokkos.cpp +++ b/src/KOKKOS/neigh_list_kokkos.cpp @@ -112,7 +112,10 @@ void NeighListKokkos<Device>::stencil_allocate(int smax, int style) } } +namespace LAMMPS_NS { template class NeighListKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class NeighListKokkos<LMPHostType>; #endif +} + diff --git a/src/KOKKOS/neighbor_kokkos.cpp b/src/KOKKOS/neighbor_kokkos.cpp index 17cd93620105f17989c0d44d1894239db7c86d92..b3ed769d75e177d8c7c52bc1d72c5fd468b0963c 100644 --- a/src/KOKKOS/neighbor_kokkos.cpp +++ b/src/KOKKOS/neighbor_kokkos.cpp @@ -598,3 +598,4 @@ void NeighborKokkos::build_topology_kokkos() { // include to trigger instantiation of templated functions #include "neigh_full_kokkos.h" + diff --git a/src/KOKKOS/pair_buck_coul_cut_kokkos.cpp b/src/KOKKOS/pair_buck_coul_cut_kokkos.cpp index 51547cea9768053874235e3f9820d6acfbe3d516..4c431bb42761e46b49d3e62dc6a5b8d98046f918 100644 --- a/src/KOKKOS/pair_buck_coul_cut_kokkos.cpp +++ b/src/KOKKOS/pair_buck_coul_cut_kokkos.cpp @@ -141,8 +141,6 @@ void PairBuckCoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in) EV_FLOAT ev = pair_compute<PairBuckCoulCutKokkos<DeviceType>,void > (this,(NeighListKokkos<DeviceType>*)list); - DeviceType::fence(); - if (eflag) { eng_vdwl += ev.evdwl; eng_coul += ev.ecoul; @@ -367,7 +365,10 @@ double PairBuckCoulCutKokkos<DeviceType>::init_one(int i, int j) return cutone; } +namespace LAMMPS_NS { template class PairBuckCoulCutKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class PairBuckCoulCutKokkos<LMPHostType>; #endif +} + diff --git a/src/KOKKOS/pair_buck_coul_long_kokkos.cpp b/src/KOKKOS/pair_buck_coul_long_kokkos.cpp index 57108ee6e097a0e318d030fd19ecf5cb11cb787e..a7e6deb43f3860e0f7399bd9f6a66c09edb05c16 100644 --- a/src/KOKKOS/pair_buck_coul_long_kokkos.cpp +++ b/src/KOKKOS/pair_buck_coul_long_kokkos.cpp @@ -153,8 +153,6 @@ void PairBuckCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in) (this,(NeighListKokkos<DeviceType>*)list); - DeviceType::fence(); - if (eflag) { eng_vdwl += ev.evdwl; eng_coul += ev.ecoul; @@ -511,7 +509,10 @@ double PairBuckCoulLongKokkos<DeviceType>::init_one(int i, int j) } +namespace LAMMPS_NS { template class PairBuckCoulLongKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class PairBuckCoulLongKokkos<LMPHostType>; #endif +} + diff --git a/src/KOKKOS/pair_buck_kokkos.cpp b/src/KOKKOS/pair_buck_kokkos.cpp index 7d73160392a18383b1775edcc38a9fd8eb051b40..50d65b4b6deea9085bfb51d1af760aafb6fd9e20 100755 --- a/src/KOKKOS/pair_buck_kokkos.cpp +++ b/src/KOKKOS/pair_buck_kokkos.cpp @@ -120,7 +120,6 @@ void PairBuckKokkos<DeviceType>::compute(int eflag_in, int vflag_in) copymode = 1; EV_FLOAT ev = pair_compute<PairBuckKokkos<DeviceType>,void >(this,(NeighListKokkos<DeviceType>*)list); - DeviceType::fence(); if (eflag_global) eng_vdwl += ev.evdwl; if (vflag_global) { @@ -280,7 +279,10 @@ double PairBuckKokkos<DeviceType>::init_one(int i, int j) return cutone; } +namespace LAMMPS_NS { template class PairBuckKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class PairBuckKokkos<LMPHostType>; -#endif \ No newline at end of file +#endif +} + diff --git a/src/KOKKOS/pair_coul_cut_kokkos.cpp b/src/KOKKOS/pair_coul_cut_kokkos.cpp index cfed188c5aa3fc7335feff2bf851e01bb3d08bb9..7b0fbad7e5e671c9789c8d479436c9457335c1b6 100644 --- a/src/KOKKOS/pair_coul_cut_kokkos.cpp +++ b/src/KOKKOS/pair_coul_cut_kokkos.cpp @@ -114,8 +114,6 @@ void PairCoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in) EV_FLOAT ev = pair_compute<PairCoulCutKokkos<DeviceType>,void > (this,(NeighListKokkos<DeviceType>*)list); - DeviceType::fence(); - if (eflag) eng_coul += ev.ecoul; if (vflag_global) { virial[0] += ev.v[0]; @@ -259,7 +257,10 @@ double PairCoulCutKokkos<DeviceType>::init_one(int i, int j) +namespace LAMMPS_NS { template class PairCoulCutKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class PairCoulCutKokkos<LMPHostType>; #endif +} + diff --git a/src/KOKKOS/pair_coul_debye_kokkos.cpp b/src/KOKKOS/pair_coul_debye_kokkos.cpp index ea28e325a5f117f9dbd52d072282783f984de6cb..c4b78b89107f839e3fa38f8feaf255e62c7fb03a 100644 --- a/src/KOKKOS/pair_coul_debye_kokkos.cpp +++ b/src/KOKKOS/pair_coul_debye_kokkos.cpp @@ -123,8 +123,6 @@ void PairCoulDebyeKokkos<DeviceType>::compute(int eflag_in, int vflag_in) EV_FLOAT ev = pair_compute<PairCoulDebyeKokkos<DeviceType>,void > (this,(NeighListKokkos<DeviceType>*)list); - DeviceType::fence(); - if (eflag) { eng_vdwl += ev.evdwl; eng_coul += ev.ecoul; @@ -307,7 +305,10 @@ double PairCoulDebyeKokkos<DeviceType>::init_one(int i, int j) return cutone; } +namespace LAMMPS_NS { template class PairCoulDebyeKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class PairCoulDebyeKokkos<LMPHostType>; #endif +} + diff --git a/src/KOKKOS/pair_coul_dsf_kokkos.cpp b/src/KOKKOS/pair_coul_dsf_kokkos.cpp index f3f1dcad90e54c48dec5976c990fae9a57e734f8..503cdc280d948ff71945bc6f315458d9ec6d3df7 100755 --- a/src/KOKKOS/pair_coul_dsf_kokkos.cpp +++ b/src/KOKKOS/pair_coul_dsf_kokkos.cpp @@ -172,7 +172,6 @@ void PairCoulDSFKokkos<DeviceType>::compute(int eflag_in, int vflag_in) } } } - DeviceType::fence(); if (eflag_global) eng_coul += ev.ecoul; if (vflag_global) { @@ -426,7 +425,10 @@ int PairCoulDSFKokkos<DeviceType>::sbmask(const int& j) const { return j >> SBBITS & 3; } +namespace LAMMPS_NS { template class PairCoulDSFKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class PairCoulDSFKokkos<LMPHostType>; #endif +} + diff --git a/src/KOKKOS/pair_coul_long_kokkos.cpp b/src/KOKKOS/pair_coul_long_kokkos.cpp index 150ed99296ad2ce537b5c75e90aa90fdbf41f226..95b6734e940996acb0ea3f6875b24d790efdd1bc 100644 --- a/src/KOKKOS/pair_coul_long_kokkos.cpp +++ b/src/KOKKOS/pair_coul_long_kokkos.cpp @@ -145,8 +145,6 @@ void PairCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in) (this,(NeighListKokkos<DeviceType>*)list); - DeviceType::fence(); - if (eflag) { eng_vdwl += ev.evdwl; eng_coul += ev.ecoul; @@ -450,7 +448,10 @@ double PairCoulLongKokkos<DeviceType>::init_one(int i, int j) } +namespace LAMMPS_NS { template class PairCoulLongKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class PairCoulLongKokkos<LMPHostType>; #endif +} + diff --git a/src/KOKKOS/pair_coul_wolf_kokkos.cpp b/src/KOKKOS/pair_coul_wolf_kokkos.cpp index d11611468f117cfe45d555112945325e47fa53ea..774580c9296645de2bb9592b8d986649db8829b8 100755 --- a/src/KOKKOS/pair_coul_wolf_kokkos.cpp +++ b/src/KOKKOS/pair_coul_wolf_kokkos.cpp @@ -173,7 +173,6 @@ void PairCoulWolfKokkos<DeviceType>::compute(int eflag_in, int vflag_in) } } } - DeviceType::fence(); if (eflag_global) eng_coul += ev.ecoul; if (vflag_global) { @@ -428,7 +427,10 @@ int PairCoulWolfKokkos<DeviceType>::sbmask(const int& j) const { return j >> SBBITS & 3; } +namespace LAMMPS_NS { template class PairCoulWolfKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class PairCoulWolfKokkos<LMPHostType>; #endif +} + diff --git a/src/KOKKOS/pair_eam_alloy_kokkos.cpp b/src/KOKKOS/pair_eam_alloy_kokkos.cpp index 706cf0b52375f7a839fbeff0944e9ef06fa99f1f..88b16d1d19d47d14ff0d302f9fc759a4819914af 100755 --- a/src/KOKKOS/pair_eam_alloy_kokkos.cpp +++ b/src/KOKKOS/pair_eam_alloy_kokkos.cpp @@ -133,7 +133,6 @@ void PairEAMAlloyKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyInitialize>(0,nall),*this); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyInitialize>(0,nlocal),*this); - DeviceType::fence(); // loop over neighbors of my atoms @@ -156,7 +155,6 @@ void PairEAMAlloyKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelA<HALFTHREAD,0> >(0,inum),*this); } } - DeviceType::fence(); // communicate and sum densities (on the host) @@ -174,7 +172,6 @@ void PairEAMAlloyKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelB<1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelB<0> >(0,inum),*this); - DeviceType::fence(); } else if (neighflag == FULL) { @@ -184,7 +181,6 @@ void PairEAMAlloyKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelAB<1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelAB<0> >(0,inum),*this); - DeviceType::fence(); } if (eflag) { @@ -239,7 +235,6 @@ void PairEAMAlloyKokkos<DeviceType>::compute(int eflag_in, int vflag_in) } } } - DeviceType::fence(); if (eflag_global) eng_vdwl += ev.evdwl; if (vflag_global) { @@ -1171,7 +1166,10 @@ void PairEAMAlloyKokkos<DeviceType>::file2array_alloy() /* ---------------------------------------------------------------------- */ +namespace LAMMPS_NS { template class PairEAMAlloyKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class PairEAMAlloyKokkos<LMPHostType>; -#endif \ No newline at end of file +#endif +} + diff --git a/src/KOKKOS/pair_eam_fs_kokkos.cpp b/src/KOKKOS/pair_eam_fs_kokkos.cpp index 6d96d3c33c3f65ef21f17f5dfc99f4b3565c8d9b..83b65e8fdc2281174c43b1a6c2da03c5b7e6c420 100755 --- a/src/KOKKOS/pair_eam_fs_kokkos.cpp +++ b/src/KOKKOS/pair_eam_fs_kokkos.cpp @@ -1180,7 +1180,10 @@ void PairEAMFSKokkos<DeviceType>::file2array_fs() /* ---------------------------------------------------------------------- */ +namespace LAMMPS_NS { template class PairEAMFSKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class PairEAMFSKokkos<LMPHostType>; -#endif \ No newline at end of file +#endif +} + diff --git a/src/KOKKOS/pair_eam_kokkos.cpp b/src/KOKKOS/pair_eam_kokkos.cpp index b643393f84492bf5db669c5bf6e4fe5fb5729186..37a26145e0773b06a51008a8e8edcd9b1427fde8 100755 --- a/src/KOKKOS/pair_eam_kokkos.cpp +++ b/src/KOKKOS/pair_eam_kokkos.cpp @@ -128,7 +128,6 @@ void PairEAMKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMInitialize>(0,nall),*this); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMInitialize>(0,nlocal),*this); - DeviceType::fence(); // loop over neighbors of my atoms @@ -151,7 +150,6 @@ void PairEAMKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMKernelA<HALFTHREAD,0> >(0,inum),*this); } } - DeviceType::fence(); // communicate and sum densities (on the host) @@ -169,7 +167,6 @@ void PairEAMKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMKernelB<1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMKernelB<0> >(0,inum),*this); - DeviceType::fence(); } else if (neighflag == FULL) { @@ -179,7 +176,6 @@ void PairEAMKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMKernelAB<1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMKernelAB<0> >(0,inum),*this); - DeviceType::fence(); } if (eflag) { @@ -234,7 +230,6 @@ void PairEAMKokkos<DeviceType>::compute(int eflag_in, int vflag_in) } } } - DeviceType::fence(); if (eflag_global) eng_vdwl += ev.evdwl; if (vflag_global) { @@ -342,60 +337,6 @@ void PairEAMKokkos<DeviceType>::file2array() template<class DeviceType> void PairEAMKokkos<DeviceType>::array2spline() { - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - rdr = 1.0/dr; rdrho = 1.0/drho; @@ -638,7 +579,6 @@ template<class DeviceType> template<int EFLAG> KOKKOS_INLINE_FUNCTION void PairEAMKokkos<DeviceType>::operator()(TagPairEAMKernelB<EFLAG>, const int &ii, EV_FLOAT& ev) const { - // fp = derivative of embedding energy at each atom // phi = embedding energy at each atom // if rho > rhomax (e.g. due to close approach of two atoms), @@ -949,7 +889,10 @@ void PairEAMKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const int & } } +namespace LAMMPS_NS { template class PairEAMKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class PairEAMKokkos<LMPHostType>; -#endif \ No newline at end of file +#endif +} + diff --git a/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.cpp b/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.cpp index 4e97e3ebd219023867cdc801e53672ce8bfddcb9..d438e64e7d923cbb866d611f6e44f8054784ee12 100644 --- a/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.cpp +++ b/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.cpp @@ -154,8 +154,6 @@ void PairLJCharmmCoulCharmmImplicitKokkos<DeviceType>::compute(int eflag_in, int (this,(NeighListKokkos<DeviceType>*)list); - DeviceType::fence(); - if (eflag) { eng_vdwl += ev.evdwl; eng_coul += ev.ecoul; @@ -510,7 +508,10 @@ double PairLJCharmmCoulCharmmImplicitKokkos<DeviceType>::init_one(int i, int j) +namespace LAMMPS_NS { template class PairLJCharmmCoulCharmmImplicitKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class PairLJCharmmCoulCharmmImplicitKokkos<LMPHostType>; #endif +} + diff --git a/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.cpp b/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.cpp index a31377f40a14f308200ac3eda0455f3bae6dbe77..4e125235f465f7a05a4c368e8299c70a545204ca 100644 --- a/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.cpp +++ b/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.cpp @@ -154,8 +154,6 @@ void PairLJCharmmCoulCharmmKokkos<DeviceType>::compute(int eflag_in, int vflag_i (this,(NeighListKokkos<DeviceType>*)list); - DeviceType::fence(); - if (eflag) { eng_vdwl += ev.evdwl; eng_coul += ev.ecoul; @@ -511,7 +509,9 @@ double PairLJCharmmCoulCharmmKokkos<DeviceType>::init_one(int i, int j) +namespace LAMMPS_NS { template class PairLJCharmmCoulCharmmKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class PairLJCharmmCoulCharmmKokkos<LMPHostType>; #endif +} diff --git a/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.cpp b/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.cpp index 224e7619522ead92e655b8913e82f710b778bb40..c749b85f3c83b2420269bb4d90d804d5b0672337 100644 --- a/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.cpp +++ b/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.cpp @@ -154,8 +154,6 @@ void PairLJCharmmCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in) (this,(NeighListKokkos<DeviceType>*)list); - DeviceType::fence(); - if (eflag) { eng_vdwl += ev.evdwl; eng_coul += ev.ecoul; @@ -518,7 +516,10 @@ double PairLJCharmmCoulLongKokkos<DeviceType>::init_one(int i, int j) +namespace LAMMPS_NS { template class PairLJCharmmCoulLongKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class PairLJCharmmCoulLongKokkos<LMPHostType>; #endif +} + diff --git a/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.cpp b/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.cpp index 9a451a744e6d60b76e14eb0362b81788bca1229e..87cd1cb7e141a50ba52d6036f6626e25eaa0ab43 100644 --- a/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.cpp +++ b/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.cpp @@ -125,8 +125,6 @@ void PairLJClass2CoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in) EV_FLOAT ev = pair_compute<PairLJClass2CoulCutKokkos<DeviceType>,void > (this,(NeighListKokkos<DeviceType>*)list); - DeviceType::fence(); - if (eflag) { eng_vdwl += ev.evdwl; eng_coul += ev.ecoul; @@ -347,8 +345,10 @@ double PairLJClass2CoulCutKokkos<DeviceType>::init_one(int i, int j) } - +namespace LAMMPS_NS { template class PairLJClass2CoulCutKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class PairLJClass2CoulCutKokkos<LMPHostType>; #endif +} + diff --git a/src/KOKKOS/pair_lj_class2_coul_long_kokkos.cpp b/src/KOKKOS/pair_lj_class2_coul_long_kokkos.cpp index a9cb4fbbe111606cb8cd73f09cff7317e26f4bb6..297a764ddabd4004ac95c3e829ece6c78c602b45 100644 --- a/src/KOKKOS/pair_lj_class2_coul_long_kokkos.cpp +++ b/src/KOKKOS/pair_lj_class2_coul_long_kokkos.cpp @@ -139,8 +139,6 @@ void PairLJClass2CoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in) (this,(NeighListKokkos<DeviceType>*)list); - DeviceType::fence(); - if (eflag) { eng_vdwl += ev.evdwl; eng_coul += ev.ecoul; @@ -496,8 +494,10 @@ double PairLJClass2CoulLongKokkos<DeviceType>::init_one(int i, int j) } - +namespace LAMMPS_NS { template class PairLJClass2CoulLongKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class PairLJClass2CoulLongKokkos<LMPHostType>; #endif +} + diff --git a/src/KOKKOS/pair_lj_class2_kokkos.cpp b/src/KOKKOS/pair_lj_class2_kokkos.cpp index 9bc3989fff9052b07f5b034698de52f8fe8695cf..a263e81e0e8a74f10f6bc36907f75c8d1527fba5 100644 --- a/src/KOKKOS/pair_lj_class2_kokkos.cpp +++ b/src/KOKKOS/pair_lj_class2_kokkos.cpp @@ -114,7 +114,6 @@ void PairLJClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in) // loop over neighbors of my atoms EV_FLOAT ev = pair_compute<PairLJClass2Kokkos<DeviceType>,void >(this,(NeighListKokkos<DeviceType>*)list); - DeviceType::fence(); if (eflag) eng_vdwl += ev.evdwl; if (vflag_global) { @@ -274,8 +273,10 @@ double PairLJClass2Kokkos<DeviceType>::init_one(int i, int j) } - +namespace LAMMPS_NS { template class PairLJClass2Kokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class PairLJClass2Kokkos<LMPHostType>; #endif +} + diff --git a/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.cpp b/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.cpp index c3b9439670b2c6a186b0015915c57f1e5e56ab91..b6071880cfe841d7a7204387cf9ca1fafa0ac953 100644 --- a/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.cpp +++ b/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.cpp @@ -123,8 +123,6 @@ void PairLJCutCoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in) EV_FLOAT ev = pair_compute<PairLJCutCoulCutKokkos<DeviceType>,void > (this,(NeighListKokkos<DeviceType>*)list); - DeviceType::fence(); - if (eflag) { eng_vdwl += ev.evdwl; eng_coul += ev.ecoul; @@ -339,7 +337,10 @@ double PairLJCutCoulCutKokkos<DeviceType>::init_one(int i, int j) +namespace LAMMPS_NS { template class PairLJCutCoulCutKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class PairLJCutCoulCutKokkos<LMPHostType>; #endif +} + diff --git a/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.cpp b/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.cpp index 4e3680bf3055aba2f57d4c4ac11c88c622112996..1da18f0afeb41af1b9320f69bc5d05bea4333ebd 100644 --- a/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.cpp +++ b/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.cpp @@ -129,8 +129,6 @@ void PairLJCutCoulDebyeKokkos<DeviceType>::compute(int eflag_in, int vflag_in) EV_FLOAT ev = pair_compute<PairLJCutCoulDebyeKokkos<DeviceType>,void > (this,(NeighListKokkos<DeviceType>*)list); - DeviceType::fence(); - if (eflag) { eng_vdwl += ev.evdwl; eng_coul += ev.ecoul; @@ -369,7 +367,10 @@ double PairLJCutCoulDebyeKokkos<DeviceType>::init_one(int i, int j) +namespace LAMMPS_NS { template class PairLJCutCoulDebyeKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class PairLJCutCoulDebyeKokkos<LMPHostType>; #endif +} + diff --git a/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.cpp b/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.cpp index 480f2ad180d22a3dab832434121236fc6b1dfba9..46cb0a96dc1a93f1d411079656dd9e3e32f59bdf 100644 --- a/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.cpp +++ b/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.cpp @@ -147,8 +147,6 @@ void PairLJCutCoulDSFKokkos<DeviceType>::compute(int eflag_in, int vflag_in) ev = pair_compute<PairLJCutCoulDSFKokkos<DeviceType>,void > (this,(NeighListKokkos<DeviceType>*)list); - DeviceType::fence(); - if (eflag) { eng_vdwl += ev.evdwl; eng_coul += ev.ecoul; @@ -360,7 +358,10 @@ double PairLJCutCoulDSFKokkos<DeviceType>::init_one(int i, int j) +namespace LAMMPS_NS { template class PairLJCutCoulDSFKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class PairLJCutCoulDSFKokkos<LMPHostType>; #endif +} + diff --git a/src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp b/src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp index 356fc4f2c5342b069a3bb9cf979d09923dbd77ab..00d1561bc3d3fa1b4fbeb7cb599d655ef0a4339b 100644 --- a/src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp +++ b/src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp @@ -137,8 +137,6 @@ void PairLJCutCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in) (this,(NeighListKokkos<DeviceType>*)list); - DeviceType::fence(); - if (eflag) { eng_vdwl += ev.evdwl; eng_coul += ev.ecoul; @@ -488,7 +486,10 @@ double PairLJCutCoulLongKokkos<DeviceType>::init_one(int i, int j) +namespace LAMMPS_NS { template class PairLJCutCoulLongKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class PairLJCutCoulLongKokkos<LMPHostType>; #endif +} + diff --git a/src/KOKKOS/pair_lj_cut_kokkos.cpp b/src/KOKKOS/pair_lj_cut_kokkos.cpp index 3fd73350edfd35f83aea27ccd8bb6beec21b6a7f..2ad7f2d0143e0873f441b0743e26a623f258e223 100644 --- a/src/KOKKOS/pair_lj_cut_kokkos.cpp +++ b/src/KOKKOS/pair_lj_cut_kokkos.cpp @@ -127,7 +127,6 @@ void PairLJCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // loop over neighbors of my atoms EV_FLOAT ev = pair_compute<PairLJCutKokkos<DeviceType>,void >(this,(NeighListKokkos<DeviceType>*)list); - DeviceType::fence(); if (eflag_global) eng_vdwl += ev.evdwl; if (vflag_global) { @@ -293,7 +292,10 @@ double PairLJCutKokkos<DeviceType>::init_one(int i, int j) +namespace LAMMPS_NS { template class PairLJCutKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class PairLJCutKokkos<LMPHostType>; #endif +} + diff --git a/src/KOKKOS/pair_lj_expand_kokkos.cpp b/src/KOKKOS/pair_lj_expand_kokkos.cpp index 29473f14bcbc8e1b4083dba10f587c122baf686e..3e1d185d2fd3c16d3a606a35848b5ad6cd04612c 100644 --- a/src/KOKKOS/pair_lj_expand_kokkos.cpp +++ b/src/KOKKOS/pair_lj_expand_kokkos.cpp @@ -115,7 +115,6 @@ void PairLJExpandKokkos<DeviceType>::compute(int eflag_in, int vflag_in) copymode = 1; EV_FLOAT ev = pair_compute<PairLJExpandKokkos<DeviceType>,void >(this,(NeighListKokkos<DeviceType>*)list); - DeviceType::fence(); if (eflag) eng_vdwl += ev.evdwl; if (vflag_global) { @@ -279,7 +278,10 @@ double PairLJExpandKokkos<DeviceType>::init_one(int i, int j) +namespace LAMMPS_NS { template class PairLJExpandKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class PairLJExpandKokkos<LMPHostType>; #endif +} + diff --git a/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.cpp b/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.cpp index 7081dd5417e9a371a6c67769cec97fef789d071b..c764af303f5a91c1556deef816af9347dd4edbad 100644 --- a/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.cpp +++ b/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.cpp @@ -145,8 +145,6 @@ void PairLJGromacsCoulGromacsKokkos<DeviceType>::compute(int eflag_in, int vflag (this,(NeighListKokkos<DeviceType>*)list); - DeviceType::fence(); - if (eflag) { eng_vdwl += ev.evdwl; eng_coul += ev.ecoul; @@ -495,7 +493,10 @@ double PairLJGromacsCoulGromacsKokkos<DeviceType>::init_one(int i, int j) +namespace LAMMPS_NS { template class PairLJGromacsCoulGromacsKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class PairLJGromacsCoulGromacsKokkos<LMPHostType>; #endif +} + diff --git a/src/KOKKOS/pair_lj_gromacs_kokkos.cpp b/src/KOKKOS/pair_lj_gromacs_kokkos.cpp index d1699fe6b6474b6bb24af26f19e260d0f1473617..2f144599ac34a8df9b3465554bb7f1f25983977e 100644 --- a/src/KOKKOS/pair_lj_gromacs_kokkos.cpp +++ b/src/KOKKOS/pair_lj_gromacs_kokkos.cpp @@ -132,8 +132,6 @@ void PairLJGromacsKokkos<DeviceType>::compute(int eflag_in, int vflag_in) EV_FLOAT ev = pair_compute<PairLJGromacsKokkos<DeviceType>,CoulLongTable<0> > (this,(NeighListKokkos<DeviceType>*)list); - DeviceType::fence(); - if (eflag) { eng_vdwl += ev.evdwl; eng_coul += 0.0; @@ -327,7 +325,10 @@ double PairLJGromacsKokkos<DeviceType>::init_one(int i, int j) return cutone; } +namespace LAMMPS_NS { template class PairLJGromacsKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class PairLJGromacsKokkos<LMPHostType>; #endif +} + diff --git a/src/KOKKOS/pair_lj_sdk_kokkos.cpp b/src/KOKKOS/pair_lj_sdk_kokkos.cpp index 6ff92035351c67058fd913463cedd20210d6adf7..74183dff0b8b332e435105e72daca6af5597c07b 100644 --- a/src/KOKKOS/pair_lj_sdk_kokkos.cpp +++ b/src/KOKKOS/pair_lj_sdk_kokkos.cpp @@ -113,7 +113,6 @@ void PairLJSDKKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // loop over neighbors of my atoms EV_FLOAT ev = pair_compute<PairLJSDKKokkos<DeviceType>,void >(this,(NeighListKokkos<DeviceType>*)list); - DeviceType::fence(); if (eflag) eng_vdwl += ev.evdwl; if (vflag_global) { @@ -212,7 +211,6 @@ void PairLJSDKKokkos<DeviceType>::allocate() d_cutsq = k_cutsq.template view<DeviceType>(); k_params = Kokkos::DualView<params_lj**,Kokkos::LayoutRight,DeviceType>("PairLJSDK::params",n+1,n+1); params = k_params.d_view; - printf("Allocating: %i\n",n); } /* ---------------------------------------------------------------------- @@ -308,7 +306,10 @@ double PairLJSDKKokkos<DeviceType>::init_one(int i, int j) +namespace LAMMPS_NS { template class PairLJSDKKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class PairLJSDKKokkos<LMPHostType>; #endif +} + diff --git a/src/KOKKOS/pair_sw_kokkos.cpp b/src/KOKKOS/pair_sw_kokkos.cpp index 8fc188f8f41b02e3be005fc1e08cd36f5b65e411..9864066fc245c26fcf803f648fb43151a25d36eb 100755 --- a/src/KOKKOS/pair_sw_kokkos.cpp +++ b/src/KOKKOS/pair_sw_kokkos.cpp @@ -128,28 +128,24 @@ void PairSWKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairSWComputeHalf<HALF,1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairSWComputeHalf<HALF,0> >(0,inum),*this); - DeviceType::fence(); ev_all += ev; } else if (neighflag == HALFTHREAD) { if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairSWComputeHalf<HALFTHREAD,1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairSWComputeHalf<HALFTHREAD,0> >(0,inum),*this); - DeviceType::fence(); ev_all += ev; } else if (neighflag == FULL) { if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairSWComputeFullA<FULL,1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairSWComputeFullA<FULL,0> >(0,inum),*this); - DeviceType::fence(); ev_all += ev; if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairSWComputeFullB<FULL,1> >(0,ignum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairSWComputeFullB<FULL,0> >(0,ignum),*this); - DeviceType::fence(); ev_all += ev; } @@ -902,7 +898,10 @@ void PairSWKokkos<DeviceType>::ev_tally3_atom(EV_FLOAT &ev, const int &i, } } +namespace LAMMPS_NS { template class PairSWKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class PairSWKokkos<LMPHostType>; #endif +} + diff --git a/src/KOKKOS/pair_table_kokkos.cpp b/src/KOKKOS/pair_table_kokkos.cpp index a7e01649cc9d04436d710574583f2eba86147655..fec6512a331d474935529a0542cdaf0ee33a68e2 100644 --- a/src/KOKKOS/pair_table_kokkos.cpp +++ b/src/KOKKOS/pair_table_kokkos.cpp @@ -192,7 +192,6 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in) else Kokkos::parallel_for(config,f); } } - DeviceType::fence(); if (eflag) eng_vdwl += ev.evdwl; if (vflag_global) { @@ -1375,8 +1374,11 @@ void PairTableKokkos<DeviceType>::cleanup_copy() { h_table=NULL; d_table=NULL; } +namespace LAMMPS_NS { template class PairTableKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class PairTableKokkos<LMPHostType>; #endif +} + diff --git a/src/KOKKOS/pair_tersoff_kokkos.cpp b/src/KOKKOS/pair_tersoff_kokkos.cpp index 61119c1585013ff115bf1372f35d22824824ef5e..a87c8563045119e4f7178a01c795bd1d7e89b4ea 100755 --- a/src/KOKKOS/pair_tersoff_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_kokkos.cpp @@ -211,28 +211,24 @@ void PairTersoffKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeHalf<HALF,1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeHalf<HALF,0> >(0,inum),*this); - DeviceType::fence(); ev_all += ev; } else if (neighflag == HALFTHREAD) { if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeHalf<HALFTHREAD,1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeHalf<HALFTHREAD,0> >(0,inum),*this); - DeviceType::fence(); ev_all += ev; } else if (neighflag == FULL) { if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeFullA<FULL,1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeFullA<FULL,0> >(0,inum),*this); - DeviceType::fence(); ev_all += ev; if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeFullB<FULL,1> >(0,ignum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeFullB<FULL,0> >(0,ignum),*this); - DeviceType::fence(); ev_all += ev; } @@ -1196,7 +1192,10 @@ int PairTersoffKokkos<DeviceType>::sbmask(const int& j) const { return j >> SBBITS & 3; } +namespace LAMMPS_NS { template class PairTersoffKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class PairTersoffKokkos<LMPHostType>; #endif +} + diff --git a/src/KOKKOS/pair_tersoff_mod_kokkos.cpp b/src/KOKKOS/pair_tersoff_mod_kokkos.cpp index a6846460bdc038a025383f9b958ae74975e7557e..9e18058a8f5f8d0212a72ea112056dc82e095139 100755 --- a/src/KOKKOS/pair_tersoff_mod_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_mod_kokkos.cpp @@ -211,28 +211,24 @@ void PairTersoffMODKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffMODComputeHalf<HALF,1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffMODComputeHalf<HALF,0> >(0,inum),*this); - DeviceType::fence(); ev_all += ev; } else if (neighflag == HALFTHREAD) { if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffMODComputeHalf<HALFTHREAD,1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffMODComputeHalf<HALFTHREAD,0> >(0,inum),*this); - DeviceType::fence(); ev_all += ev; } else if (neighflag == FULL) { if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffMODComputeFullA<FULL,1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffMODComputeFullA<FULL,0> >(0,inum),*this); - DeviceType::fence(); ev_all += ev; if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffMODComputeFullB<FULL,1> >(0,ignum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffMODComputeFullB<FULL,0> >(0,ignum),*this); - DeviceType::fence(); ev_all += ev; } @@ -1202,7 +1198,10 @@ int PairTersoffMODKokkos<DeviceType>::sbmask(const int& j) const { return j >> SBBITS & 3; } +namespace LAMMPS_NS { template class PairTersoffMODKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class PairTersoffMODKokkos<LMPHostType>; #endif +} + diff --git a/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp b/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp index 1b1023d946d56fee3e2e0631989bc7803d41b5ba..5f6a95130d1b036a842bc1508213b3b361c4a852 100755 --- a/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp @@ -225,28 +225,24 @@ void PairTersoffZBLKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffZBLComputeHalf<HALF,1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffZBLComputeHalf<HALF,0> >(0,inum),*this); - DeviceType::fence(); ev_all += ev; } else if (neighflag == HALFTHREAD) { if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffZBLComputeHalf<HALFTHREAD,1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffZBLComputeHalf<HALFTHREAD,0> >(0,inum),*this); - DeviceType::fence(); ev_all += ev; } else if (neighflag == FULL) { if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffZBLComputeFullA<FULL,1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffZBLComputeFullA<FULL,0> >(0,inum),*this); - DeviceType::fence(); ev_all += ev; if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffZBLComputeFullB<FULL,1> >(0,ignum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffZBLComputeFullB<FULL,0> >(0,ignum),*this); - DeviceType::fence(); ev_all += ev; } @@ -1296,7 +1292,10 @@ int PairTersoffZBLKokkos<DeviceType>::sbmask(const int& j) const { return j >> SBBITS & 3; } +namespace LAMMPS_NS { template class PairTersoffZBLKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class PairTersoffZBLKokkos<LMPHostType>; #endif +} + diff --git a/src/KOKKOS/region_block_kokkos.cpp b/src/KOKKOS/region_block_kokkos.cpp index 0de027591265ba69417b835b09c6fb9f30a3bd89..68cc7e49c21651b227b8419feb5ced6f1a45433c 100755 --- a/src/KOKKOS/region_block_kokkos.cpp +++ b/src/KOKKOS/region_block_kokkos.cpp @@ -164,7 +164,10 @@ void RegBlockKokkos<DeviceType>::rotate(double &x, double &y, double &z, double z = point[2] + c[2] + disp[2]; } +namespace LAMMPS_NS { template class RegBlockKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA template class RegBlockKokkos<LMPHostType>; #endif +} + diff --git a/src/KOKKOS/verlet_kokkos.cpp b/src/KOKKOS/verlet_kokkos.cpp index 527e10add33956e294b0da3d4befe88f8733f553..d063294023e1830ee0f54a5b63d470b027a3e714 100644 --- a/src/KOKKOS/verlet_kokkos.cpp +++ b/src/KOKKOS/verlet_kokkos.cpp @@ -39,6 +39,19 @@ using namespace LAMMPS_NS; +template<class ViewA, class ViewB> +struct ForceAdder { + ViewA a; + ViewB b; + ForceAdder(const ViewA& a_, const ViewB& b_):a(a_),b(b_) {} + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + a(i,0) += b(i,0); + a(i,1) += b(i,1); + a(i,2) += b(i,2); + } +}; + /* ---------------------------------------------------------------------- */ VerletKokkos::VerletKokkos(LAMMPS *lmp, int narg, char **arg) : @@ -278,6 +291,8 @@ void VerletKokkos::run(int n) if (atomKK->sortfreq > 0) sortflag = 1; else sortflag = 0; + f_merge_copy = DAT::t_f_array("VerletKokkos::f_merge_copy",atomKK->k_f.dimension_0()); + static double time = 0.0; static int count = 0; atomKK->sync(Device,ALL_MASK); @@ -359,54 +374,141 @@ void VerletKokkos::run(int n) timer->stamp(); - // added for debug - //atomKK->k_x.sync<LMPHostType>(); - //atomKK->k_f.sync<LMPHostType>(); - //atomKK->k_f.modify<LMPHostType>(); if (n_pre_force) { modify->pre_force(vflag); timer->stamp(Timer::MODIFY); } + bool execute_on_host = false; + unsigned int datamask_read_device = 0; + unsigned int datamask_modify_device = 0; + unsigned int datamask_read_host = 0; + unsigned int datamask_modify_host = 0; + + if ( pair_compute_flag ) { + if (force->pair->execution_space==Host) { + execute_on_host = true; + datamask_read_host |= force->pair->datamask_read; + datamask_modify_device |= force->pair->datamask_modify; + } else { + datamask_read_device |= force->pair->datamask_read; + datamask_modify_device |= force->pair->datamask_modify; + } + } + if ( atomKK->molecular && force->bond ) { + if (force->bond->execution_space==Host) { + execute_on_host = true; + datamask_read_host |= force->bond->datamask_read; + datamask_modify_device |= force->bond->datamask_modify; + } else { + datamask_read_device |= force->bond->datamask_read; + datamask_modify_device |= force->bond->datamask_modify; + } + } + if ( atomKK->molecular && force->angle ) { + if (force->angle->execution_space==Host) { + execute_on_host = true; + datamask_read_host |= force->angle->datamask_read; + datamask_modify_device |= force->angle->datamask_modify; + } else { + datamask_read_device |= force->angle->datamask_read; + datamask_modify_device |= force->angle->datamask_modify; + } + } + if ( atomKK->molecular && force->dihedral ) { + if (force->dihedral->execution_space==Host) { + execute_on_host = true; + datamask_read_host |= force->dihedral->datamask_read; + datamask_modify_device |= force->dihedral->datamask_modify; + } else { + datamask_read_device |= force->dihedral->datamask_read; + datamask_modify_device |= force->dihedral->datamask_modify; + } + } + if ( atomKK->molecular && force->improper ) { + if (force->improper->execution_space==Host) { + execute_on_host = true; + datamask_read_host |= force->improper->datamask_read; + datamask_modify_device |= force->improper->datamask_modify; + } else { + datamask_read_device |= force->improper->datamask_read; + datamask_modify_device |= force->improper->datamask_modify; + } + } + if ( kspace_compute_flag ) { + if (force->kspace->execution_space==Host) { + execute_on_host = true; + datamask_read_host |= force->kspace->datamask_read; + datamask_modify_device |= force->kspace->datamask_modify; + } else { + datamask_read_device |= force->kspace->datamask_read; + datamask_modify_device |= force->kspace->datamask_modify; + } + } + if (pair_compute_flag) { atomKK->sync(force->pair->execution_space,force->pair->datamask_read); atomKK->modified(force->pair->execution_space,force->pair->datamask_modify); + atomKK->sync(force->pair->execution_space,~(~force->pair->datamask_read|(F_MASK | ENERGY_MASK | VIRIAL_MASK))); + atomKK->modified(force->pair->execution_space,~(~force->pair->datamask_modify|(F_MASK | ENERGY_MASK | VIRIAL_MASK))); + Kokkos::Impl::Timer ktimer; force->pair->compute(eflag,vflag); timer->stamp(Timer::PAIR); } + if(execute_on_host) { + if(pair_compute_flag && force->pair->datamask_modify!=(F_MASK | ENERGY_MASK | VIRIAL_MASK)) + Kokkos::fence(); + atomKK->sync_overlapping_device(Host,~(~datamask_read_host|(F_MASK | ENERGY_MASK | VIRIAL_MASK))); + if(pair_compute_flag && force->pair->execution_space!=Host) { + Kokkos::deep_copy(LMPHostType(),atomKK->k_f.h_view,0.0); + } + } + if (atomKK->molecular) { if (force->bond) { - atomKK->sync(force->bond->execution_space,force->bond->datamask_read); - atomKK->modified(force->bond->execution_space,force->bond->datamask_modify); + atomKK->sync(force->bond->execution_space,~(~force->bond->datamask_read|(F_MASK | ENERGY_MASK | VIRIAL_MASK))); + atomKK->modified(force->bond->execution_space,~(~force->bond->datamask_modify|(F_MASK | ENERGY_MASK | VIRIAL_MASK))); force->bond->compute(eflag,vflag); } if (force->angle) { - atomKK->sync(force->angle->execution_space,force->angle->datamask_read); - atomKK->modified(force->angle->execution_space,force->angle->datamask_modify); + atomKK->sync(force->angle->execution_space,~(~force->angle->datamask_read|(F_MASK | ENERGY_MASK | VIRIAL_MASK))); + atomKK->modified(force->angle->execution_space,~(~force->angle->datamask_modify|(F_MASK | ENERGY_MASK | VIRIAL_MASK))); force->angle->compute(eflag,vflag); } if (force->dihedral) { - atomKK->sync(force->dihedral->execution_space,force->dihedral->datamask_read); - atomKK->modified(force->dihedral->execution_space,force->dihedral->datamask_modify); + atomKK->sync(force->dihedral->execution_space,~(~force->dihedral->datamask_read|(F_MASK | ENERGY_MASK | VIRIAL_MASK))); + atomKK->modified(force->dihedral->execution_space,~(~force->dihedral->datamask_modify|(F_MASK | ENERGY_MASK | VIRIAL_MASK))); force->dihedral->compute(eflag,vflag); } if (force->improper) { - atomKK->sync(force->improper->execution_space,force->improper->datamask_read); - atomKK->modified(force->improper->execution_space,force->improper->datamask_modify); + atomKK->sync(force->improper->execution_space,~(~force->improper->datamask_read|(F_MASK | ENERGY_MASK | VIRIAL_MASK))); + atomKK->modified(force->improper->execution_space,~(~force->improper->datamask_modify|(F_MASK | ENERGY_MASK | VIRIAL_MASK))); force->improper->compute(eflag,vflag); } timer->stamp(Timer::BOND); } if (kspace_compute_flag) { - atomKK->sync(force->kspace->execution_space,force->kspace->datamask_read); - atomKK->modified(force->kspace->execution_space,force->kspace->datamask_modify); + atomKK->sync(force->kspace->execution_space,~(~force->kspace->datamask_read|(F_MASK | ENERGY_MASK | VIRIAL_MASK))); + atomKK->modified(force->kspace->execution_space,~(~force->kspace->datamask_modify|(F_MASK | ENERGY_MASK | VIRIAL_MASK))); force->kspace->compute(eflag,vflag); timer->stamp(Timer::KSPACE); } + if(execute_on_host && !std::is_same<LMPHostType,LMPDeviceType>::value) { + if(f_merge_copy.dimension_0()<atomKK->k_f.dimension_0()) { + f_merge_copy = DAT::t_f_array("VerletKokkos::f_merge_copy",atomKK->k_f.dimension_0()); + } + f = atomKK->k_f.d_view; + Kokkos::deep_copy(LMPHostType(),f_merge_copy,atomKK->k_f.h_view); + Kokkos::parallel_for(atomKK->k_f.dimension_0(), + ForceAdder<DAT::t_f_array,DAT::t_f_array>(atomKK->k_f.d_view,f_merge_copy)); + atomKK->k_f.template modify<LMPDeviceType>(); + } + + // reverse communication of forces if (force->newton) comm->reverse_comm(); @@ -414,15 +516,11 @@ void VerletKokkos::run(int n) // force modifications, final time integration, diagnostics - ktimer.reset(); - if (n_post_force) modify->post_force(vflag); modify->final_integrate(); if (n_end_of_step) modify->end_of_step(); timer->stamp(Timer::MODIFY); - time += ktimer.seconds(); - // all output if (ntimestep == output->next) { @@ -506,3 +604,5 @@ void VerletKokkos::force_clear() } } } + + diff --git a/src/KOKKOS/verlet_kokkos.h b/src/KOKKOS/verlet_kokkos.h index 4b64ced5676062904f8eebf675f31c71f1346af5..03a93833245dfcf41d36f069eb4b4839b5c156df 100644 --- a/src/KOKKOS/verlet_kokkos.h +++ b/src/KOKKOS/verlet_kokkos.h @@ -21,6 +21,7 @@ IntegrateStyle(verlet/kk,VerletKokkos) #define LMP_VERLET_KOKKOS_H #include "verlet.h" +#include "kokkos_type.h" namespace LAMMPS_NS { @@ -32,8 +33,16 @@ class VerletKokkos : public Verlet { void setup_minimal(int); void run(int); - protected: + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + f(i,0) += f_merge_copy(i,0); + f(i,1) += f_merge_copy(i,1); + f(i,2) += f_merge_copy(i,2); + } + + protected: + DAT::t_f_array f_merge_copy,f; void force_clear(); }; diff --git a/src/lammps.cpp b/src/lammps.cpp index 94a267db8a35504245ed73927595c5cec5f13246..56829e1246b926b489b082470f1b5f540ee64cef 100644 --- a/src/lammps.cpp +++ b/src/lammps.cpp @@ -650,7 +650,11 @@ void LAMMPS::create() if (kokkos) atom = new AtomKokkos(this); else atom = new Atom(this); - atom->create_avec("atomic",0,NULL,1); + + if (kokkos) + atom->create_avec("atomic/kk",0,NULL,1); + else + atom->create_avec("atomic",0,NULL,1); group = new Group(this); force = new Force(this); // must be after group, to create temperature