diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.cpp b/src/KOKKOS/atom_vec_dpd_kokkos.cpp index c79559172fcf332e9ed30ff57ec2835dacddc1c1..58fc9c46c355f49816bb6aa77583593937fbaa0c 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dpd_kokkos.cpp @@ -1801,6 +1801,15 @@ void AtomVecDPDKokkos::sync(ExecutionSpace space, unsigned int mask) if (mask & TYPE_MASK) atomKK->k_type.sync<LMPDeviceType>(); if (mask & MASK_MASK) atomKK->k_mask.sync<LMPDeviceType>(); if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPDeviceType>(); + if (mask & DPDRHO_MASK) atomKK->k_rho.sync<LMPDeviceType>(); + if (mask & DPDTHETA_MASK) atomKK->k_dpdTheta.sync<LMPDeviceType>(); + if (mask & UCOND_MASK) atomKK->k_uCond.sync<LMPDeviceType>(); + if (mask & UMECH_MASK) atomKK->k_uMech.sync<LMPDeviceType>(); + if (mask & UCHEM_MASK) atomKK->k_uChem.sync<LMPDeviceType>(); + if (mask & UCG_MASK) atomKK->k_uCG.sync<LMPDeviceType>(); + if (mask & UCGNEW_MASK) atomKK->k_uCGnew.sync<LMPDeviceType>(); + if (mask & DUCHEM_MASK) atomKK->k_duChem.sync<LMPDeviceType>(); + if (mask & DVECTOR_MASK) atomKK->k_dvector.sync<LMPDeviceType>(); } else { if (mask & X_MASK) atomKK->k_x.sync<LMPHostType>(); if (mask & V_MASK) atomKK->k_v.sync<LMPHostType>(); @@ -1809,6 +1818,15 @@ void AtomVecDPDKokkos::sync(ExecutionSpace space, unsigned int mask) if (mask & TYPE_MASK) atomKK->k_type.sync<LMPHostType>(); if (mask & MASK_MASK) atomKK->k_mask.sync<LMPHostType>(); if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPHostType>(); + if (mask & DPDRHO_MASK) atomKK->k_rho.sync<LMPHostType>(); + if (mask & DPDTHETA_MASK) atomKK->k_dpdTheta.sync<LMPHostType>(); + if (mask & UCOND_MASK) atomKK->k_uCond.sync<LMPHostType>(); + if (mask & UMECH_MASK) atomKK->k_uMech.sync<LMPHostType>(); + if (mask & UCHEM_MASK) atomKK->k_uChem.sync<LMPHostType>(); + if (mask & UCG_MASK) atomKK->k_uCG.sync<LMPHostType>(); + if (mask & UCGNEW_MASK) atomKK->k_uCGnew.sync<LMPHostType>(); + if (mask & DUCHEM_MASK) atomKK->k_duChem.sync<LMPHostType>(); + if (mask & DVECTOR_MASK) atomKK->k_dvector.sync<LMPHostType>(); } } @@ -1831,6 +1849,24 @@ void AtomVecDPDKokkos::sync_overlapping_device(ExecutionSpace space, unsigned in perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space); if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPDeviceType>()) perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space); + if ((mask & DPDRHO_MASK) && atomKK->k_rho.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_rho,space); + if ((mask & DPDTHETA_MASK) && atomKK->k_dpdTheta.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_dpdTheta,space); + if ((mask & UCOND_MASK) && atomKK->k_uCond.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_uCond,space); + if ((mask & UMECH_MASK) && atomKK->k_uMech.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_uMech,space); + if ((mask & UCHEM_MASK) && atomKK->k_uChem.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_uChem,space); + if ((mask & UCG_MASK) && atomKK->k_uCG.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_uCG,space); + if ((mask & UCGNEW_MASK) && atomKK->k_uCGnew.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_uCGnew,space); + if ((mask & DUCHEM_MASK) && atomKK->k_duChem.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_duChem,space); + if ((mask & DVECTOR_MASK) && atomKK->k_dvector.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_float_2d>(atomKK->k_dvector,space); } else { if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPHostType>()) perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space); @@ -1846,6 +1882,24 @@ void AtomVecDPDKokkos::sync_overlapping_device(ExecutionSpace space, unsigned in perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space); if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPHostType>()) perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space); + if ((mask & DPDRHO_MASK) && atomKK->k_rho.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_rho,space); + if ((mask & DPDTHETA_MASK) && atomKK->k_dpdTheta.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_dpdTheta,space); + if ((mask & UCOND_MASK) && atomKK->k_uCond.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_uCond,space); + if ((mask & UMECH_MASK) && atomKK->k_uMech.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_uMech,space); + if ((mask & UCHEM_MASK) && atomKK->k_uChem.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_uChem,space); + if ((mask & UCG_MASK) && atomKK->k_uCG.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_uCG,space); + if ((mask & UCGNEW_MASK) && atomKK->k_uCGnew.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_uCGnew,space); + if ((mask & DUCHEM_MASK) && atomKK->k_duChem.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_duChem,space); + if ((mask & DVECTOR_MASK) && atomKK->k_dvector.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_float_2d>(atomKK->k_dvector,space); } } @@ -1861,6 +1915,15 @@ void AtomVecDPDKokkos::modified(ExecutionSpace space, unsigned int mask) if (mask & TYPE_MASK) atomKK->k_type.modify<LMPDeviceType>(); if (mask & MASK_MASK) atomKK->k_mask.modify<LMPDeviceType>(); if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPDeviceType>(); + if (mask & DPDRHO_MASK) atomKK->k_rho.modify<LMPDeviceType>(); + if (mask & DPDTHETA_MASK) atomKK->k_dpdTheta.modify<LMPDeviceType>(); + if (mask & UCOND_MASK) atomKK->k_uCond.modify<LMPDeviceType>(); + if (mask & UMECH_MASK) atomKK->k_uMech.modify<LMPDeviceType>(); + if (mask & UCHEM_MASK) atomKK->k_uChem.modify<LMPDeviceType>(); + if (mask & UCG_MASK) atomKK->k_uCG.modify<LMPDeviceType>(); + if (mask & UCGNEW_MASK) atomKK->k_uCGnew.modify<LMPDeviceType>(); + if (mask & DUCHEM_MASK) atomKK->k_duChem.modify<LMPDeviceType>(); + if (mask & DVECTOR_MASK) atomKK->k_dvector.modify<LMPDeviceType>(); } else { if (mask & X_MASK) atomKK->k_x.modify<LMPHostType>(); if (mask & V_MASK) atomKK->k_v.modify<LMPHostType>(); @@ -1869,6 +1932,15 @@ void AtomVecDPDKokkos::modified(ExecutionSpace space, unsigned int mask) if (mask & TYPE_MASK) atomKK->k_type.modify<LMPHostType>(); if (mask & MASK_MASK) atomKK->k_mask.modify<LMPHostType>(); if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPHostType>(); + if (mask & DPDRHO_MASK) atomKK->k_rho.modify<LMPHostType>(); + if (mask & DPDTHETA_MASK) atomKK->k_dpdTheta.modify<LMPHostType>(); + if (mask & UCOND_MASK) atomKK->k_uCond.modify<LMPHostType>(); + if (mask & UMECH_MASK) atomKK->k_uMech.modify<LMPHostType>(); + if (mask & UCHEM_MASK) atomKK->k_uChem.modify<LMPHostType>(); + if (mask & UCG_MASK) atomKK->k_uCG.modify<LMPHostType>(); + if (mask & UCGNEW_MASK) atomKK->k_uCGnew.modify<LMPHostType>(); + if (mask & DUCHEM_MASK) atomKK->k_duChem.modify<LMPHostType>(); + if (mask & DVECTOR_MASK) atomKK->k_dvector.modify<LMPHostType>(); } } diff --git a/src/KOKKOS/fix_eos_table_rx_kokkos.cpp b/src/KOKKOS/fix_eos_table_rx_kokkos.cpp index faf490fcc03fef024a4ba1f6b648b0be7cd147e3..75e9b292f9643bdba105e010cb581750eca30775 100644 --- a/src/KOKKOS/fix_eos_table_rx_kokkos.cpp +++ b/src/KOKKOS/fix_eos_table_rx_kokkos.cpp @@ -52,7 +52,7 @@ FixEOStableRXKokkos<DeviceType>::FixEOStableRXKokkos(LAMMPS *lmp, int narg, char template<class DeviceType> FixEOStableRXKokkos<DeviceType>::~FixEOStableRXKokkos() { - + if (copymode) return; } /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp index 45da5bf165be58d4f1976ae61f89d878778c7dff..0bfbb9491e07405cc943a8398af3deb18f8ebd8d 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp @@ -52,6 +52,8 @@ PairDPDfdtEnergyKokkos<DeviceType>::PairDPDfdtEnergyKokkos(LAMMPS *lmp) : PairDP { atomKK = (AtomKokkos *) atom; execution_space = ExecutionSpaceFromDevice<DeviceType>::space; + datamask_read = X_MASK | F_MASK | TYPE_MASK | TAG_MASK | ENERGY_MASK | VIRIAL_MASK; + datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; cutsq = NULL; } @@ -357,6 +359,7 @@ double PairDPDfdtEnergyKokkos<DeviceType>::init_one(int i, int j) m_cutsq[j][i] = m_cutsq[i][j] = cutone*cutone; } k_cutsq.h_view(i,j) = cutone*cutone; + k_cutsq.h_view(j,i) = k_cutsq.h_view(i,j); k_cutsq.template modify<LMPHostType>(); return cutone; diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index c46f3d037d636aaa9b8088b2f399cd8d8100c30f..7e74f39ef09926289f9d96a6aba855dcd08d1dd1 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -54,8 +54,8 @@ PairExp6rxKokkos<DeviceType>::PairExp6rxKokkos(LAMMPS *lmp) : PairExp6rx(lmp) { atomKK = (AtomKokkos *) atom; execution_space = ExecutionSpaceFromDevice<DeviceType>::space; - datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK; - datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; + datamask_read = EMPTY_MASK; + datamask_modify = EMPTY_MASK; k_error_flag = DAT::tdual_int_scalar("pair:error_flag"); } @@ -104,6 +104,8 @@ void PairExp6rxKokkos<DeviceType>::init_style() template<class DeviceType> void PairExp6rxKokkos<DeviceType>::compute(int eflag_in, int vflag_in) { + copymode = 1; + eflag = eflag_in; vflag = vflag_in; @@ -141,7 +143,9 @@ void PairExp6rxKokkos<DeviceType>::compute(int eflag_in, int vflag_in) special_coul[3] = force->special_coul[3]; newton_pair = force->newton_pair; - copymode = 1; + atomKK->sync(execution_space,X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK | UCG_MASK | UCGNEW_MASK | DVECTOR_MASK); + if (evflag) atomKK->modified(execution_space,F_MASK | ENERGY_MASK | VIRIAL_MASK | UCG_MASK | UCGNEW_MASK); + else atomKK->modified(execution_space,F_MASK | UCG_MASK | UCGNEW_MASK); // Initialize the Exp6 parameter data for both the local // and ghost atoms. Make the parameter data persistent @@ -185,10 +189,22 @@ void PairExp6rxKokkos<DeviceType>::compute(int eflag_in, int vflag_in) EV_FLOAT ev; - if (evflag) { - Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairExp6rxCompute<HALF,1,1> >(0,inum),*this,ev); - } else { - Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairExp6rxCompute<HALF,1,0> >(0,inum),*this); + if (neighflag == HALF) { + if (newton_pair) { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairExp6rxCompute<HALF,1,1> >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairExp6rxCompute<HALF,1,0> >(0,inum),*this); + } else { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairExp6rxCompute<HALF,0,1> >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairExp6rxCompute<HALF,0,0> >(0,inum),*this); + } + } else if (neighflag == HALFTHREAD) { + if (newton_pair) { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairExp6rxCompute<HALFTHREAD,1,1> >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairExp6rxCompute<HALFTHREAD,1,0> >(0,inum),*this); + } else { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairExp6rxCompute<HALFTHREAD,0,1> >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairExp6rxCompute<HALFTHREAD,0,0> >(0,inum),*this); + } } k_error_flag.template modify<DeviceType>(); @@ -246,6 +262,12 @@ template<class DeviceType> template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG> KOKKOS_INLINE_FUNCTION void PairExp6rxKokkos<DeviceType>::operator()(TagPairExp6rxCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int &ii, EV_FLOAT& ev) const { + + // These arrays are atomic for Half/Thread neighbor style + Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f; + Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_uCG = uCG; + Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_uCGnew = uCGnew; + int i,j,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,evdwlOld,fpair; double rsq,r2inv,r6inv,forceExp6,factor_lj; @@ -287,6 +309,12 @@ void PairExp6rxKokkos<DeviceType>::operator()(TagPairExp6rxCompute<NEIGHFLAG,NEW itype = type[i]; jnum = d_numneigh[i]; + double fx_i = 0.0; + double fy_i = 0.0; + double fz_i = 0.0; + double uCG_i = 0.0; + double uCGnew_i = 0.0; + { epsilon1_i = PairExp6ParamData.epsilon1[i]; alpha1_i = PairExp6ParamData.alpha1[i]; @@ -457,9 +485,9 @@ void PairExp6rxKokkos<DeviceType>::operator()(TagPairExp6rxCompute<NEIGHFLAG,NEW evdwlOld *= factor_lj; - uCG[i] += 0.5*evdwlOld; + uCG_i += 0.5*evdwlOld; if (newton_pair || j < nlocal) - uCG[j] += 0.5*evdwlOld; + a_uCG[j] += 0.5*evdwlOld; } if(rm12_ij!=0.0 && rm21_ij!=0.0){ @@ -537,28 +565,34 @@ void PairExp6rxKokkos<DeviceType>::operator()(TagPairExp6rxCompute<NEIGHFLAG,NEW if (isite1 == isite2) fpair = sqrt(fractionOld1_i*fractionOld2_j)*fpairOldEXP6_12; else fpair = sqrt(fractionOld1_i*fractionOld2_j)*fpairOldEXP6_12 + sqrt(fractionOld2_i*fractionOld1_j)*fpairOldEXP6_21; - f(i,0) += delx*fpair; - f(i,1) += dely*fpair; - f(i,2) += delz*fpair; + fx_i += delx*fpair; + fy_i += dely*fpair; + fz_i += delz*fpair; if (newton_pair || j < nlocal) { - f(j,0) -= delx*fpair; - f(j,1) -= dely*fpair; - f(j,2) -= delz*fpair; + a_f(j,0) -= delx*fpair; + a_f(j,1) -= dely*fpair; + a_f(j,2) -= delz*fpair; } if (isite1 == isite2) evdwl = sqrt(fraction1_i*fraction2_j)*evdwlEXP6_12; else evdwl = sqrt(fraction1_i*fraction2_j)*evdwlEXP6_12 + sqrt(fraction2_i*fraction1_j)*evdwlEXP6_21; evdwl *= factor_lj; - uCGnew[i] += 0.5*evdwl; + uCGnew_i += 0.5*evdwl; if (newton_pair || j < nlocal) - uCGnew[j] += 0.5*evdwl; + a_uCGnew[j] += 0.5*evdwl; evdwl = evdwlOld; //if (vflag_either || eflag_atom) if (EVFLAG) this->template ev_tally<NEIGHFLAG,NEWTON_PAIR>(ev,i,j,evdwl,fpair,delx,dely,delz); } } } + + a_f(i,0) += fx_i; + a_f(i,1) += fy_i; + a_f(i,2) += fz_i; + a_uCG[i] += uCG_i; + a_uCGnew[i] += uCGnew_i; } template<class DeviceType> diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp index bea7cb6b0bfbfa855e9d9faa5910b143bb1f7898..03bbaf9907d72b4ccc6e7c84318127617f1e2abe 100644 --- a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp +++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp @@ -68,8 +68,14 @@ PairMultiLucyRXKokkos<DeviceType>::PairMultiLucyRXKokkos(LAMMPS *lmp) : PairMult atomKK = (AtomKokkos *) atom; execution_space = ExecutionSpaceFromDevice<DeviceType>::space; - datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK; - datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; + datamask_read = EMPTY_MASK; + datamask_modify = EMPTY_MASK; + + update_table = 0; + ntables = 0; + tables = NULL; + h_table = new TableHost(); + d_table = new TableDevice(); k_error_flag = DAT::tdual_int_scalar("pair:error_flag"); } @@ -79,7 +85,10 @@ PairMultiLucyRXKokkos<DeviceType>::PairMultiLucyRXKokkos(LAMMPS *lmp) : PairMult template<class DeviceType> PairMultiLucyRXKokkos<DeviceType>::~PairMultiLucyRXKokkos() { + if (copymode) return; + delete h_table; + delete d_table; } /* ---------------------------------------------------------------------- */ @@ -109,7 +118,7 @@ void PairMultiLucyRXKokkos<DeviceType>::init_style() neighbor->requests[irequest]->half = 1; neighbor->requests[irequest]->ghost = 1; } else { - error->all(FLERR,"Cannot use chosen neighbor list style with reax/c/kk"); + error->all(FLERR,"Cannot use chosen neighbor list style with multi/lucy/rx/kk"); } } @@ -118,6 +127,23 @@ void PairMultiLucyRXKokkos<DeviceType>::init_style() template<class DeviceType> void PairMultiLucyRXKokkos<DeviceType>::compute(int eflag_in, int vflag_in) { + if (update_table) + create_kokkos_tables(); + + if (tabstyle == LOOKUP) + compute_style<LOOKUP>(eflag_in,vflag_in); + else if(tabstyle == LINEAR) + compute_style<LINEAR>(eflag_in,vflag_in); +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +template<int TABSTYLE> +void PairMultiLucyRXKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in) +{ + copymode = 1; + eflag = eflag_in; vflag = vflag_in; @@ -145,10 +171,14 @@ void PairMultiLucyRXKokkos<DeviceType>::compute(int eflag_in, int vflag_in) x = atomKK->k_x.view<DeviceType>(); f = atomKK->k_f.view<DeviceType>(); type = atomKK->k_type.view<DeviceType>(); + rho = atomKK->k_rho.view<DeviceType>(); uCG = atomKK->k_uCG.view<DeviceType>(); uCGnew = atomKK->k_uCGnew.view<DeviceType>(); dvector = atomKK->k_dvector.view<DeviceType>(); - rho = atomKK->k_rho.view<DeviceType>(); + + atomKK->sync(execution_space,X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK | DPDRHO_MASK | UCG_MASK | UCGNEW_MASK | DVECTOR_MASK); + if (evflag) atomKK->modified(execution_space,F_MASK | ENERGY_MASK | VIRIAL_MASK | UCG_MASK | UCGNEW_MASK); + else atomKK->modified(execution_space,F_MASK | UCG_MASK | UCGNEW_MASK); nlocal = atom->nlocal; int nghost = atom->nghost; @@ -176,10 +206,22 @@ void PairMultiLucyRXKokkos<DeviceType>::compute(int eflag_in, int vflag_in) EV_FLOAT ev; - if (evflag) { - Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALF,1,1> >(0,inum),*this,ev); - } else { - Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALF,1,0> >(0,inum),*this); + if (neighflag == HALF) { + if (newton_pair) { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALF,1,1,TABSTYLE> >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALF,1,0,TABSTYLE> >(0,inum),*this); + } else { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALF,0,1,TABSTYLE> >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALF,0,0,TABSTYLE> >(0,inum),*this); + } + } else if (neighflag == HALFTHREAD) { + if (newton_pair) { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALFTHREAD,1,1,TABSTYLE> >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALFTHREAD,1,0,TABSTYLE> >(0,inum),*this); + } else { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALFTHREAD,0,1,TABSTYLE> >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALFTHREAD,0,0,TABSTYLE> >(0,inum),*this); + } } k_error_flag.template modify<DeviceType>(); @@ -223,9 +265,13 @@ void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXgetParams, } template<class DeviceType> -template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG> +template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int TABSTYLE> KOKKOS_INLINE_FUNCTION -void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int &ii, EV_FLOAT& ev) const { +void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,TABSTYLE>, const int &ii, EV_FLOAT& ev) const { + + // The f array is atomic for Half/Thread neighbor style + Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f; + int i,j,jj,inum,jnum,itype,jtype,itable; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,evdwlOld,fpair; double rsq; @@ -239,8 +285,6 @@ void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXCompute<NEI double fraction_i,fraction_j; int jtable; - Table *tb; - int tlm1 = tablength - 1; i = d_ilist[ii]; @@ -274,26 +318,34 @@ void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXCompute<NEI fractionOld1_j = d_fractionOld1[j]; fractionOld2_j = d_fractionOld2[j]; - tb = &tables[tabindex[itype][jtype]]; - if (rho[i]*rho[i] < tb->innersq || rho[j]*rho[j] < tb->innersq){ + //tb = &tables[tabindex[itype][jtype]]; + const int tidx = d_table_const.tabindex(itype,jtype); + //if (rho[i]*rho[i] < tb->innersq || rho[j]*rho[j] < tb->innersq){ + if (rho[i]*rho[i] < d_table_const.innersq(tidx) || rho[j]*rho[j] < d_table_const.innersq(tidx)){ k_error_flag.d_view() = 1; } - if (tabstyle == LOOKUP) { - itable = static_cast<int> (((rho[i]*rho[i]) - tb->innersq) * tb->invdelta); - jtable = static_cast<int> (((rho[j]*rho[j]) - tb->innersq) * tb->invdelta); + if (TABSTYLE == LOOKUP) { + //itable = static_cast<int> (((rho[i]*rho[i]) - tb->innersq) * tb->invdelta); + itable = static_cast<int> (((rho[i]*rho[i]) - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); + //jtable = static_cast<int> (((rho[j]*rho[j]) - tb->innersq) * tb->invdelta); + jtable = static_cast<int> (((rho[j]*rho[j]) - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); if (itable >= tlm1 || jtable >= tlm1){ k_error_flag.d_view() = 2; } - A_i = tb->f[itable]; - A_j = tb->f[jtable]; + //A_i = tb->f[itable]; + A_i = d_table_const.f(tidx,itable); + //A_j = tb->f[jtable]; + A_j = d_table_const.f(tidx,jtable); const double rfactor = 1.0-sqrt(rsq/d_cutsq(itype,jtype)); fpair = 0.5*(A_i + A_j)*(4.0-3.0*rfactor)*rfactor*rfactor*rfactor; fpair /= sqrt(rsq); - } else if (tabstyle == LINEAR) { - itable = static_cast<int> ((rho[i]*rho[i] - tb->innersq) * tb->invdelta); - jtable = static_cast<int> (((rho[j]*rho[j]) - tb->innersq) * tb->invdelta); + } else if (TABSTYLE == LINEAR) { + //itable = static_cast<int> ((rho[i]*rho[i] - tb->innersq) * tb->invdelta); + itable = static_cast<int> ((rho[i]*rho[i] - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); + //jtable = static_cast<int> (((rho[j]*rho[j]) - tb->innersq) * tb->invdelta); + jtable = static_cast<int> ((rho[j]*rho[j] - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); if (itable >= tlm1 || jtable >= tlm1){ k_error_flag.d_view() = 2; } @@ -302,15 +354,19 @@ void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXCompute<NEI if(jtable<0) jtable=0; if(jtable>=tlm1)jtable=tlm1; - fraction_i = (((rho[i]*rho[i]) - tb->rsq[itable]) * tb->invdelta); - fraction_j = (((rho[j]*rho[j]) - tb->rsq[jtable]) * tb->invdelta); + //fraction_i = (((rho[i]*rho[i]) - tb->rsq[itable]) * tb->invdelta); + fraction_i = (((rho[i]*rho[i]) - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx)); + //fraction_j = (((rho[j]*rho[j]) - tb->rsq[jtable]) * tb->invdelta); + fraction_j = (((rho[j]*rho[j]) - d_table_const.rsq(tidx,jtable)) * d_table_const.invdelta(tidx)); if(itable==0) fraction_i=0.0; if(itable==tlm1) fraction_i=0.0; if(jtable==0) fraction_j=0.0; if(jtable==tlm1) fraction_j=0.0; - A_i = tb->f[itable] + fraction_i*tb->df[itable]; - A_j = tb->f[jtable] + fraction_j*tb->df[jtable]; + //A_i = tb->f[itable] + fraction_i*tb->df[itable]; + A_i = d_table_const.f(tidx,itable) + fraction_i*d_table_const.df(tidx,itable); + //A_j = tb->f[jtable] + fraction_j*tb->df[jtable]; + A_j = d_table_const.f(tidx,jtable) + fraction_j*d_table_const.df(tidx,jtable); const double rfactor = 1.0-sqrt(rsq/d_cutsq(itype,jtype)); fpair = 0.5*(A_i + A_j)*(4.0-3.0*rfactor)*rfactor*rfactor*rfactor; @@ -325,29 +381,34 @@ void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXCompute<NEI fy_i += dely*fpair; fz_i += delz*fpair; if (NEWTON_PAIR || j < nlocal) { - f(j,0) -= delx*fpair; - f(j,1) -= dely*fpair; - f(j,2) -= delz*fpair; + a_f(j,0) -= delx*fpair; + a_f(j,1) -= dely*fpair; + a_f(j,2) -= delz*fpair; } //if (evflag) ev_tally(i,j,nlocal,newton_pair,0.0,0.0,fpair,delx,dely,delz); if (EVFLAG) this->template ev_tally<NEIGHFLAG,NEWTON_PAIR>(ev,i,j,0.0,fpair,delx,dely,delz); } } - f(i,0) += fx_i; - f(i,1) += fy_i; - f(i,2) += fz_i; - - tb = &tables[tabindex[itype][itype]]; - itable = static_cast<int> (((rho[i]*rho[i]) - tb->innersq) * tb->invdelta); - if (tabstyle == LOOKUP) evdwl = tb->e[itable]; - else if (tabstyle == LINEAR){ + a_f(i,0) += fx_i; + a_f(i,1) += fy_i; + a_f(i,2) += fz_i; + + //tb = &tables[tabindex[itype][itype]]; + const int tidx = d_table_const.tabindex(itype,itype); + //itable = static_cast<int> (((rho[i]*rho[i]) - tb->innersq) * tb->invdelta); + itable = static_cast<int> (((rho[i]*rho[i]) - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); + //if (TABSTYLE == LOOKUP) evdwl = tb->e[itable]; + if (TABSTYLE == LOOKUP) evdwl = d_table_const.e(tidx,itable); + else if (TABSTYLE == LINEAR){ if (itable >= tlm1){ k_error_flag.d_view() = 2; } if(itable==0) fraction_i=0.0; - else fraction_i = (((rho[i]*rho[i]) - tb->rsq[itable]) * tb->invdelta); - evdwl = tb->e[itable] + fraction_i*tb->de[itable]; + //else fraction_i = (((rho[i]*rho[i]) - tb->rsq[itable]) * tb->invdelta); + else fraction_i = (((rho[i]*rho[i]) - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx)); + //evdwl = tb->e[itable] + fraction_i*tb->de[itable]; + evdwl = d_table_const.e(tidx,itable); + fraction_i*d_table_const.de(tidx,itable); } else k_error_flag.d_view() = 3; evdwl *=(pi*d_cutsq(itype,itype)*d_cutsq(itype,itype))/84.0; @@ -364,121 +425,11 @@ void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXCompute<NEI } template<class DeviceType> -template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG> +template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int TABSTYLE> KOKKOS_INLINE_FUNCTION -void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int &ii) const { +void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,TABSTYLE>, const int &ii) const { EV_FLOAT ev; - this->template operator()<NEIGHFLAG,NEWTON_PAIR,EVFLAG>(TagPairMultiLucyRXCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG>(), ii, ev); -} - -/* ---------------------------------------------------------------------- - set coeffs for one or more type pairs -------------------------------------------------------------------------- */ - -template<class DeviceType> -void PairMultiLucyRXKokkos<DeviceType>::coeff(int narg, char **arg) -{ - if (narg != 6 && narg != 7) error->all(FLERR,"Illegal pair_coeff command"); - - bool rx_flag = false; - for (int i = 0; i < modify->nfix; i++) - if (strncmp(modify->fix[i]->style,"rx",2) == 0) rx_flag = true; - if (!rx_flag) error->all(FLERR,"PairMultiLucyRXKokkos<DeviceType> requires a fix rx command."); - - if (!allocated) allocate(); - - int ilo,ihi,jlo,jhi; - force->bounds(FLERR,arg[0],atom->ntypes,ilo,ihi); - force->bounds(FLERR,arg[1],atom->ntypes,jlo,jhi); - - int me; - MPI_Comm_rank(world,&me); - tables = (Table *) - memory->srealloc(tables,(ntables+1)*sizeof(Table),"pair:tables"); - Table *tb = &tables[ntables]; - null_table(tb); - if (me == 0) read_table(tb,arg[2],arg[3]); - bcast_table(tb); - - nspecies = atom->nspecies_dpd; - int n; - n = strlen(arg[3]) + 1; - site1 = new char[n]; - strcpy(site1,arg[4]); - - n = strlen(arg[4]) + 1; - site2 = new char[n]; - strcpy(site2,arg[5]); - - // set table cutoff - - if (narg == 7) tb->cut = force->numeric(FLERR,arg[6]); - else if (tb->rflag) tb->cut = tb->rhi; - else tb->cut = tb->rfile[tb->ninput-1]; - - // error check on table parameters - // insure cutoff is within table - - if (tb->ninput <= 1) error->one(FLERR,"Invalid pair table length"); - if (tb->rflag == 0) { - rho_0 = tb->rfile[0]; - } else { - rho_0 = tb->rlo; - } - - tb->match = 0; - if (tabstyle == LINEAR && tb->ninput == tablength && - tb->rflag == RSQ) tb->match = 1; - - // spline read-in values and compute r,e,f vectors within table - - if (tb->match == 0) spline_table(tb); - compute_table(tb); - - // store ptr to table in tabindex - - int count = 0; - for (int i = ilo; i <= ihi; i++) { - for (int j = MAX(jlo,i); j <= jhi; j++) { - tabindex[i][j] = ntables; - setflag[i][j] = 1; - count++; - } - } - - if (count == 0) error->all(FLERR,"Illegal pair_coeff command"); - ntables++; - - // Match site* to isite values. - - if (strcmp(site1, "1fluid") == 0) - isite1 = oneFluidParameter; - else { - isite1 = nspecies; - for (int ispecies = 0; ispecies < nspecies; ++ispecies) - if (strcmp(site1, atom->dname[ispecies]) == 0){ - isite1 = ispecies; - break; - } - - if (isite1 == nspecies) - error->all(FLERR,"Pair_multi_lucy_rx site1 is invalid."); - } - - if (strcmp(site2, "1fluid") == 0) - isite2 = oneFluidParameter; - else { - isite2 = nspecies; - for (int ispecies = 0; ispecies < nspecies; ++ispecies) - if (strcmp(site2, atom->dname[ispecies]) == 0){ - isite2 = ispecies; - break; - } - - if (isite2 == nspecies) - error->all(FLERR,"Pair_multi_lucy_rx site2 is invalid."); - } - + this->template operator()<NEIGHFLAG,NEWTON_PAIR,EVFLAG,TABSTYLE>(TagPairMultiLucyRXCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,TABSTYLE>(), ii, ev); } /* ---------------------------------------------------------------------- */ @@ -486,12 +437,16 @@ void PairMultiLucyRXKokkos<DeviceType>::coeff(int narg, char **arg) template<class DeviceType> void PairMultiLucyRXKokkos<DeviceType>::computeLocalDensity() { + copymode = 1; + x = atomKK->k_x.view<DeviceType>(); type = atomKK->k_type.view<DeviceType>(); rho = atomKK->k_rho.view<DeviceType>(); + h_rho = atomKK->k_rho.h_view; nlocal = atom->nlocal; - //sync + atomKK->sync(execution_space,X_MASK | TYPE_MASK | DPDRHO_MASK); + atomKK->modified(execution_space,DPDRHO_MASK); const int inum = list->inum; NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list); @@ -514,16 +469,34 @@ void PairMultiLucyRXKokkos<DeviceType>::computeLocalDensity() if (newton_pair) m += atom->nghost; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXZero>(0,m),*this); -// rho = density at each atom -// loop over neighbors of my atoms - if (newton_pair) - Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<HALF,1> >(0,inum),*this); - else - Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<HALF,0> >(0,inum),*this); + // rho = density at each atom + // loop over neighbors of my atoms + + if (neighflag == HALF) { + if (newton_pair) + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<HALF,1> >(0,inum),*this); + else + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<HALF,0> >(0,inum),*this); + } else if (neighflag == HALFTHREAD) { + if (newton_pair) + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<HALFTHREAD,1> >(0,inum),*this); + else + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<HALFTHREAD,0> >(0,inum),*this); + } + + // communicate and sum densities (on the host) - if (newton_pair) comm->reverse_comm_pair(this); + if (newton_pair) { + atomKK->modified(execution_space,DPDRHO_MASK); + atomKK->sync(Host,DPDRHO_MASK); + comm->reverse_comm_pair(this); + atomKK->modified(Host,DPDRHO_MASK); + atomKK->sync(execution_space,DPDRHO_MASK); + } comm->forward_comm_pair(this); + + copymode = 0; } template<class DeviceType> @@ -536,6 +509,10 @@ template<class DeviceType> template<int NEIGHFLAG, int NEWTON_PAIR> KOKKOS_INLINE_FUNCTION void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXComputeLocalDensity<NEIGHFLAG,NEWTON_PAIR>, const int &ii) const { + + // The rho array is atomic for Half/Thread neighbor style + Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_rho = rho; + const int i = d_ilist[ii]; const double xtmp = x(i,0); @@ -567,7 +544,7 @@ void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXComputeLoca const double factor = factor_type11*(1.0 + 1.5*r_over_rcut)*tmpFactor4; rho_i += factor; if (NEWTON_PAIR || j < nlocal) - rho[j] += factor; + a_rho[j] += factor; } else if (rsq < d_cutsq(itype,jtype)) { const double rcut = sqrt(d_cutsq(itype,jtype)); const double tmpFactor = 1.0-sqrt(rsq)/rcut; @@ -575,12 +552,12 @@ void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXComputeLoca const double factor = (84.0/(5.0*pi*rcut*rcut*rcut))*(1.0+3.0*sqrt(rsq)/(2.0*rcut))*tmpFactor4; rho_i += factor; if (NEWTON_PAIR || j < nlocal) - rho[j] += factor; + a_rho[j] += factor; } } } - rho[i] = rho_i; + a_rho[i] = rho_i; } /* ---------------------------------------------------------------------- */ @@ -630,16 +607,53 @@ void PairMultiLucyRXKokkos<DeviceType>::getParams(int id, double &fractionOld1, /* ---------------------------------------------------------------------- */ +template<class DeviceType> +int PairMultiLucyRXKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_2d k_sendlist, int iswap_in, DAT::tdual_xfloat_1d &buf, + int pbc_flag, int *pbc) +{ + d_sendlist = k_sendlist.view<DeviceType>(); + iswap = iswap_in; + v_buf = buf.view<DeviceType>(); + Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairMultiLucyRXPackForwardComm>(0,n),*this); + DeviceType::fence(); + return n; +} + +template<class DeviceType> +KOKKOS_INLINE_FUNCTION +void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXPackForwardComm, const int &i) const { + int j = d_sendlist(iswap, i); + v_buf[i] = rho[j]; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +void PairMultiLucyRXKokkos<DeviceType>::unpack_forward_comm_kokkos(int n, int first_in, DAT::tdual_xfloat_1d &buf) +{ + first = first_in; + v_buf = buf.view<DeviceType>(); + Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairMultiLucyRXUnpackForwardComm>(0,n),*this); + DeviceType::fence(); +} + +template<class DeviceType> +KOKKOS_INLINE_FUNCTION +void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXUnpackForwardComm, const int &i) const { + rho[i + first] = v_buf[i]; +} + +/* ---------------------------------------------------------------------- */ + template<class DeviceType> int PairMultiLucyRXKokkos<DeviceType>::pack_forward_comm(int n, int *list, double *buf, int pbc_flag, int *pbc) { int i,j,m; - rho = atomKK->k_rho.view<DeviceType>(); m = 0; for (i = 0; i < n; i++) { j = list[i]; - buf[m++] = rho[j]; + buf[m++] = h_rho[j]; } return m; } @@ -650,11 +664,10 @@ template<class DeviceType> void PairMultiLucyRXKokkos<DeviceType>::unpack_forward_comm(int n, int first, double *buf) { int i,m,last; - rho = atomKK->k_rho.view<DeviceType>(); m = 0; last = first + n; - for (i = first; i < last; i++) rho[i] = buf[m++]; + for (i = first; i < last; i++) h_rho[i] = buf[m++]; } /* ---------------------------------------------------------------------- */ @@ -663,11 +676,10 @@ template<class DeviceType> int PairMultiLucyRXKokkos<DeviceType>::pack_reverse_comm(int n, int first, double *buf) { int i,m,last; - rho = atomKK->k_rho.view<DeviceType>(); m = 0; last = first + n; - for (i = first; i < last; i++) buf[m++] = rho[i]; + for (i = first; i < last; i++) buf[m++] = h_rho[i]; return m; } @@ -677,12 +689,11 @@ template<class DeviceType> void PairMultiLucyRXKokkos<DeviceType>::unpack_reverse_comm(int n, int *list, double *buf) { int i,j,m; - rho = atomKK->k_rho.view<DeviceType>(); m = 0; for (i = 0; i < n; i++) { j = list[i]; - rho[j] += buf[m++]; + h_rho[j] += buf[m++]; } } @@ -782,6 +793,145 @@ void PairMultiLucyRXKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, con /* ---------------------------------------------------------------------- */ +template<class DeviceType> +void PairMultiLucyRXKokkos<DeviceType>::create_kokkos_tables() +{ + const int tlm1 = tablength-1; + + memory->create_kokkos(d_table->innersq,h_table->innersq,ntables,"Table::innersq"); + memory->create_kokkos(d_table->invdelta,h_table->invdelta,ntables,"Table::invdelta"); + memory->create_kokkos(d_table->deltasq6,h_table->deltasq6,ntables,"Table::deltasq6"); + + if(tabstyle == LOOKUP) { + memory->create_kokkos(d_table->e,h_table->e,ntables,tlm1,"Table::e"); + memory->create_kokkos(d_table->f,h_table->f,ntables,tlm1,"Table::f"); + } + + if(tabstyle == LINEAR) { + memory->create_kokkos(d_table->rsq,h_table->rsq,ntables,tablength,"Table::rsq"); + memory->create_kokkos(d_table->e,h_table->e,ntables,tablength,"Table::e"); + memory->create_kokkos(d_table->f,h_table->f,ntables,tablength,"Table::f"); + memory->create_kokkos(d_table->de,h_table->de,ntables,tlm1,"Table::de"); + memory->create_kokkos(d_table->df,h_table->df,ntables,tlm1,"Table::df"); + } + + for(int i=0; i < ntables; i++) { + Table* tb = &tables[i]; + + h_table->innersq[i] = tb->innersq; + h_table->invdelta[i] = tb->invdelta; + h_table->deltasq6[i] = tb->deltasq6; + + for(int j = 0; j<h_table->rsq.dimension_1(); j++) + h_table->rsq(i,j) = tb->rsq[j]; + for(int j = 0; j<h_table->drsq.dimension_1(); j++) + h_table->drsq(i,j) = tb->drsq[j]; + for(int j = 0; j<h_table->e.dimension_1(); j++) + h_table->e(i,j) = tb->e[j]; + for(int j = 0; j<h_table->de.dimension_1(); j++) + h_table->de(i,j) = tb->de[j]; + for(int j = 0; j<h_table->f.dimension_1(); j++) + h_table->f(i,j) = tb->f[j]; + for(int j = 0; j<h_table->df.dimension_1(); j++) + h_table->df(i,j) = tb->df[j]; + for(int j = 0; j<h_table->e2.dimension_1(); j++) + h_table->e2(i,j) = tb->e2[j]; + for(int j = 0; j<h_table->f2.dimension_1(); j++) + h_table->f2(i,j) = tb->f2[j]; + } + + + Kokkos::deep_copy(d_table->innersq,h_table->innersq); + Kokkos::deep_copy(d_table->invdelta,h_table->invdelta); + Kokkos::deep_copy(d_table->deltasq6,h_table->deltasq6); + Kokkos::deep_copy(d_table->rsq,h_table->rsq); + Kokkos::deep_copy(d_table->drsq,h_table->drsq); + Kokkos::deep_copy(d_table->e,h_table->e); + Kokkos::deep_copy(d_table->de,h_table->de); + Kokkos::deep_copy(d_table->f,h_table->f); + Kokkos::deep_copy(d_table->df,h_table->df); + Kokkos::deep_copy(d_table->e2,h_table->e2); + Kokkos::deep_copy(d_table->f2,h_table->f2); + Kokkos::deep_copy(d_table->tabindex,h_table->tabindex); + + d_table_const.innersq = d_table->innersq; + d_table_const.invdelta = d_table->invdelta; + d_table_const.deltasq6 = d_table->deltasq6; + d_table_const.rsq = d_table->rsq; + d_table_const.drsq = d_table->drsq; + d_table_const.e = d_table->e; + d_table_const.de = d_table->de; + d_table_const.f = d_table->f; + d_table_const.df = d_table->df; + d_table_const.e2 = d_table->e2; + d_table_const.f2 = d_table->f2; + + + Kokkos::deep_copy(d_table->cutsq,h_table->cutsq); + update_table = 0; +} + +/* ---------------------------------------------------------------------- + allocate all arrays +------------------------------------------------------------------------- */ + +template<class DeviceType> +void PairMultiLucyRXKokkos<DeviceType>::allocate() +{ + allocated = 1; + const int nt = atom->ntypes + 1; + + memory->create(setflag,nt,nt,"pair:setflag"); + memory->create_kokkos(d_table->cutsq,h_table->cutsq,cutsq,nt,nt,"pair:cutsq"); + memory->create_kokkos(d_table->tabindex,h_table->tabindex,tabindex,nt,nt,"pair:tabindex"); + + d_table_const.cutsq = d_table->cutsq; + d_table_const.tabindex = d_table->tabindex; + memset(&setflag[0][0],0,nt*nt*sizeof(int)); + memset(&cutsq[0][0],0,nt*nt*sizeof(double)); + memset(&tabindex[0][0],0,nt*nt*sizeof(int)); +} + +/* ---------------------------------------------------------------------- + global settings +------------------------------------------------------------------------- */ + +template<class DeviceType> +void PairMultiLucyRXKokkos<DeviceType>::settings(int narg, char **arg) +{ + if (narg < 2) error->all(FLERR,"Illegal pair_style command"); + + // new settings + + if (strcmp(arg[0],"lookup") == 0) tabstyle = LOOKUP; + else if (strcmp(arg[0],"linear") == 0) tabstyle = LINEAR; + else error->all(FLERR,"Unknown table style in pair_style command"); + + tablength = force->inumeric(FLERR,arg[1]); + if (tablength < 2) error->all(FLERR,"Illegal number of pair table entries"); + + // delete old tables, since cannot just change settings + + for (int m = 0; m < ntables; m++) free_table(&tables[m]); + memory->sfree(tables); + + if (allocated) { + memory->destroy(setflag); + + d_table_const.tabindex = d_table->tabindex = typename ArrayTypes<DeviceType>::t_int_2d(); + h_table->tabindex = typename ArrayTypes<LMPHostType>::t_int_2d(); + + d_table_const.cutsq = d_table->cutsq = typename ArrayTypes<DeviceType>::t_ffloat_2d(); + h_table->cutsq = typename ArrayTypes<LMPHostType>::t_ffloat_2d(); + } + allocated = 0; + + ntables = 0; + tables = NULL; +} + +/* ---------------------------------------------------------------------- */ + namespace LAMMPS_NS { template class PairMultiLucyRXKokkos<LMPDeviceType>; #ifdef KOKKOS_HAVE_CUDA diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.h b/src/KOKKOS/pair_multi_lucy_rx_kokkos.h index ff22516eb18c65723cb073c092d6bcaea1f8ebe4..a259588f78fdeab8f751d6d182c99b1d01275bca 100644 --- a/src/KOKKOS/pair_multi_lucy_rx_kokkos.h +++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.h @@ -29,9 +29,12 @@ PairStyle(multi/lucy/rx/kk/host,PairMultiLucyRXKokkos<LMPHostType>) namespace LAMMPS_NS { +struct TagPairMultiLucyRXPackForwardComm{}; +struct TagPairMultiLucyRXUnpackForwardComm{}; + struct TagPairMultiLucyRXgetParams{}; -template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG> +template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int TABSTYLE> struct TagPairMultiLucyRXCompute{}; struct TagPairMultiLucyRXZero{}; @@ -50,24 +53,37 @@ class PairMultiLucyRXKokkos : public PairMultiLucyRX { virtual ~PairMultiLucyRXKokkos(); void compute(int, int); + void settings(int, char **); + + template<int TABSTYLE> + void compute_style(int, int); + void init_style(); - void coeff(int, char **); + int pack_forward_comm_kokkos(int, DAT::tdual_int_2d, int, DAT::tdual_xfloat_1d&, + int, int *); + void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d&); int pack_forward_comm(int, int *, double *, int, int *); void unpack_forward_comm(int, int, double *); int pack_reverse_comm(int, int, double *); void unpack_reverse_comm(int, int *, double *); void computeLocalDensity(); + KOKKOS_INLINE_FUNCTION + void operator()(TagPairMultiLucyRXPackForwardComm, const int&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagPairMultiLucyRXUnpackForwardComm, const int&) const; + KOKKOS_INLINE_FUNCTION void operator()(TagPairMultiLucyRXgetParams, const int&) const; - template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG> + template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int TABSTYLE> KOKKOS_INLINE_FUNCTION - void operator()(TagPairMultiLucyRXCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int&, EV_FLOAT&) const; + void operator()(TagPairMultiLucyRXCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,TABSTYLE>, const int&, EV_FLOAT&) const; - template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG> + template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int TABSTYLE> KOKKOS_INLINE_FUNCTION - void operator()(TagPairMultiLucyRXCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int&) const; + void operator()(TagPairMultiLucyRXCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,TABSTYLE>, const int&) const; KOKKOS_INLINE_FUNCTION void operator()(TagPairMultiLucyRXZero, const int&) const; @@ -92,6 +108,8 @@ class PairMultiLucyRXKokkos : public PairMultiLucyRX { double rcut_type11; double factor_type11; + enum{LOOKUP,LINEAR,SPLINE,BITMAP}; + //struct Table { // int ninput,rflag,fpflag,match; // double rlo,rhi,fplo,fphi,cut; @@ -100,14 +118,47 @@ class PairMultiLucyRXKokkos : public PairMultiLucyRX { // double innersq,delta,invdelta,deltasq6; // double *rsq,*drsq,*e,*de,*f,*df,*e2,*f2; //}; - //Table *tables; - int **tabindex; + int tabstyle,tablength; + /*struct TableDeviceConst { + typename ArrayTypes<DeviceType>::t_ffloat_2d_randomread cutsq; + typename ArrayTypes<DeviceType>::t_int_2d_randomread tabindex; + typename ArrayTypes<DeviceType>::t_ffloat_1d_randomread innersq,invdelta,deltasq6; + typename ArrayTypes<DeviceType>::t_ffloat_2d_randomread rsq,drsq,e,de,f,df,e2,f2; + };*/ + //Its faster not to use texture fetch if the number of tables is less than 32! + struct TableDeviceConst { + typename ArrayTypes<DeviceType>::t_ffloat_2d cutsq; + typename ArrayTypes<DeviceType>::t_int_2d tabindex; + typename ArrayTypes<DeviceType>::t_ffloat_1d innersq,invdelta,deltasq6; + typename ArrayTypes<DeviceType>::t_ffloat_2d_randomread rsq,drsq,e,de,f,df,e2,f2; + }; + + struct TableDevice { + typename ArrayTypes<DeviceType>::t_ffloat_2d cutsq; + typename ArrayTypes<DeviceType>::t_int_2d tabindex; + typename ArrayTypes<DeviceType>::t_ffloat_1d innersq,invdelta,deltasq6; + typename ArrayTypes<DeviceType>::t_ffloat_2d rsq,drsq,e,de,f,df,e2,f2; + }; + + struct TableHost { + typename ArrayTypes<LMPHostType>::t_ffloat_2d cutsq; + typename ArrayTypes<LMPHostType>::t_int_2d tabindex; + typename ArrayTypes<LMPHostType>::t_ffloat_1d innersq,invdelta,deltasq6; + typename ArrayTypes<LMPHostType>::t_ffloat_2d rsq,drsq,e,de,f,df,e2,f2; + }; + + TableDeviceConst d_table_const; + TableDevice* d_table; + TableHost* h_table; - //void read_table(Table *, char *, char *); - //void param_extract(Table *, char *); + int **tabindex; + F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; - char *site1, *site2; + void allocate(); + int update_table; + void create_kokkos_tables(); + void cleanup_copy(); KOKKOS_INLINE_FUNCTION void getParams(int, double &, double &, double &, double &) const; @@ -118,6 +169,7 @@ class PairMultiLucyRXKokkos : public PairMultiLucyRX { typename AT::t_f_array f; typename AT::t_int_1d_randomread type; typename AT::t_efloat_1d rho; + typename HAT::t_efloat_1d h_rho; typename AT::t_efloat_1d uCG, uCGnew; typename AT::t_float_2d dvector; @@ -135,6 +187,11 @@ class PairMultiLucyRXKokkos : public PairMultiLucyRX { typename AT::tdual_ffloat_2d k_cutsq; typename AT::t_ffloat_2d d_cutsq; + int iswap; + int first; + typename AT::t_int_2d d_sendlist; + typename AT::t_xfloat_1d_um v_buf; + friend void pair_virial_fdotr_compute<PairMultiLucyRXKokkos>(PairMultiLucyRXKokkos*); }; diff --git a/src/USER-DPD/fix_eos_table_rx.cpp b/src/USER-DPD/fix_eos_table_rx.cpp index e10ce960899e313e0474420dc149b0135ffccc95..8871bdd176223d1bc72437eaf57eb44ff725eb03 100644 --- a/src/USER-DPD/fix_eos_table_rx.cpp +++ b/src/USER-DPD/fix_eos_table_rx.cpp @@ -127,6 +127,8 @@ FixEOStableRX::FixEOStableRX(LAMMPS *lmp, int narg, char **arg) : FixEOStableRX::~FixEOStableRX() { + if (copymode) return; + for (int m = 0; m < ntables; m++) { free_table(&tables[m]); free_table(&tables2[m]); diff --git a/src/USER-DPD/pair_multi_lucy_rx.cpp b/src/USER-DPD/pair_multi_lucy_rx.cpp index cd107f1519e8f5918eb6ecc71ad50f4086c207d0..6b5c7cf40a6ab547a09b3bec73235d5f20dbd6f9 100644 --- a/src/USER-DPD/pair_multi_lucy_rx.cpp +++ b/src/USER-DPD/pair_multi_lucy_rx.cpp @@ -78,6 +78,8 @@ PairMultiLucyRX::PairMultiLucyRX(LAMMPS *lmp) : Pair(lmp), PairMultiLucyRX::~PairMultiLucyRX() { + if (copymode) return; + for (int m = 0; m < ntables; m++) free_table(&tables[m]); memory->sfree(tables); diff --git a/src/USER-DPD/pair_table_rx.cpp b/src/USER-DPD/pair_table_rx.cpp index 902d0e5bb4397144599637f8c03c977b7784ba6c..463e1838c6ec9868c5d46dcadc9a1b6393097f25 100644 --- a/src/USER-DPD/pair_table_rx.cpp +++ b/src/USER-DPD/pair_table_rx.cpp @@ -50,6 +50,8 @@ PairTableRX::PairTableRX(LAMMPS *lmp) : Pair(lmp) PairTableRX::~PairTableRX() { + if (copymode) return; + for (int m = 0; m < ntables; m++) free_table(&tables[m]); memory->sfree(tables); diff --git a/src/atom_masks.h b/src/atom_masks.h index 119f09f273bfe9bdaab723f9435d1ed4e8ee172f..8e29448488df89760010312a15fa8e9f36a40496 100644 --- a/src/atom_masks.h +++ b/src/atom_masks.h @@ -42,6 +42,18 @@ #define ENERGY_MASK 0x00010000 #define VIRIAL_MASK 0x00020000 +// DPD + +#define DPDRHO_MASK 0x00040000 +#define DPDTHETA_MASK 0x00080000 +#define UCOND_MASK 0x00100000 +#define UMECH_MASK 0x00200000 +#define UCHEM_MASK 0x00400000 +#define UCG_MASK 0x00800000 +#define UCGNEW_MASK 0x01000000 +#define DUCHEM_MASK 0x02000000 +#define DVECTOR_MASK 0x04000000 + // granular #define RADIUS_MASK 0x00100000