diff --git a/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp b/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp index e6c9756b94d7cc3558c3f5df74dfb861e32b5383..cd8beed583a4f29e3990a5723bed0e17cb243bca 100644 --- a/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp +++ b/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp @@ -159,9 +159,17 @@ void PairGranHookeHistoryKokkos<DeviceType>::compute(int eflag_in, int vflag_in) d_neighbors = k_list->d_neighbors; d_ilist = k_list->d_ilist; + if (d_numneigh.extent(0) != d_numneigh_touch.extent(0)) + d_numneigh_touch = typename AT::t_int_1d("pair:numneigh_touch",d_numneigh.extent(0)); + if (d_neighbors.extent(0) != d_neighbors_touch.extent(0) || + d_neighbors.extent(1) != d_neighbors_touch.extent(1)) + d_neighbors_touch = typename AT::t_neighbors_2d("pair:neighbors_touch",d_neighbors.extent(0),d_neighbors.extent(1)); + d_firsttouch = fix_historyKK->d_firstflag; d_firstshear = fix_historyKK->d_firstvalue; - + + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryReduce>(0,inum),*this); + EV_FLOAT ev; if (lmp->kokkos->neighflag == HALF) { @@ -269,6 +277,44 @@ void PairGranHookeHistoryKokkos<DeviceType>::compute(int eflag_in, int vflag_in) copymode = 0; } +template<class DeviceType> +KOKKOS_INLINE_FUNCTION +void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryReduce, const int ii) const { + const int i = d_ilist[ii]; + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + const LMP_FLOAT imass = rmass[i]; + const LMP_FLOAT irad = radius[i]; + const int jnum = d_numneigh[i]; + int count = 0; + + for (int jj = 0; jj < jnum; jj++) { + const int j = d_neighbors(i,jj) & NEIGHMASK; + + const X_FLOAT delx = xtmp - x(j,0); + const X_FLOAT dely = ytmp - x(j,1); + const X_FLOAT delz = ztmp - x(j,2); + const X_FLOAT rsq = delx*delx + dely*dely + delz*delz; + const LMP_FLOAT jmass = rmass[j]; + const LMP_FLOAT jrad = radius[j]; + const LMP_FLOAT radsum = irad + jrad; + + // check for touching neighbors + + if (rsq >= radsum * radsum) { + d_firsttouch(i,jj) = 0; + d_firstshear(i,3*jj) = 0; + d_firstshear(i,3*jj+1) = 0; + d_firstshear(i,3*jj+2) = 0; + } else { + d_firsttouch(i,jj) = 1; + d_neighbors_touch(i,count++) = jj; + } + } + d_numneigh_touch[i] = count; +} + template<class DeviceType> template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int SHEARUPDATE> KOKKOS_INLINE_FUNCTION @@ -282,10 +328,9 @@ void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryC const X_FLOAT xtmp = x(i,0); const X_FLOAT ytmp = x(i,1); const X_FLOAT ztmp = x(i,2); - const int itype = type[i]; const LMP_FLOAT imass = rmass[i]; const LMP_FLOAT irad = radius[i]; - const int jnum = d_numneigh[i]; + const int jnum = d_numneigh_touch[i]; F_FLOAT fx_i = 0.0; F_FLOAT fy_i = 0.0; @@ -294,157 +339,148 @@ void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryC F_FLOAT torquex_i = 0.0; F_FLOAT torquey_i = 0.0; F_FLOAT torquez_i = 0.0; - - for (int jj = 0; jj < jnum; jj++) { - int j = d_neighbors(i,jj); - j &= NEIGHMASK; + for (int jj = 0; jj < jnum; jj++) { + const int m = d_neighbors_touch(i, jj); + const int j = d_neighbors(i, m) & NEIGHMASK; + const X_FLOAT delx = xtmp - x(j,0); const X_FLOAT dely = ytmp - x(j,1); const X_FLOAT delz = ztmp - x(j,2); const X_FLOAT rsq = delx*delx + dely*dely + delz*delz; - const int jtype = type[j]; const LMP_FLOAT jmass = rmass[j]; const LMP_FLOAT jrad = radius[j]; const LMP_FLOAT radsum = irad + jrad; // check for touching neighbors - if (rsq >= radsum * radsum) { - d_firsttouch(i,jj) = 0; - d_firstshear(i,3*jj) = 0; - d_firstshear(i,3*jj+1) = 0; - d_firstshear(i,3*jj+2) = 0; - } else { - const LMP_FLOAT r = sqrt(rsq); - const LMP_FLOAT rinv = 1.0/r; - const LMP_FLOAT rsqinv = 1/rsq; + const LMP_FLOAT r = sqrt(rsq); + const LMP_FLOAT rinv = 1.0/r; + const LMP_FLOAT rsqinv = 1/rsq; - // relative translational velocity + // relative translational velocity - V_FLOAT vr1 = v(i,0) - v(j,0); - V_FLOAT vr2 = v(i,1) - v(j,1); - V_FLOAT vr3 = v(i,2) - v(j,2); + V_FLOAT vr1 = v(i,0) - v(j,0); + V_FLOAT vr2 = v(i,1) - v(j,1); + V_FLOAT vr3 = v(i,2) - v(j,2); - // normal component + // normal component - V_FLOAT vnnr = vr1*delx + vr2*dely + vr3*delz; - V_FLOAT vn1 = delx*vnnr * rsqinv; - V_FLOAT vn2 = dely*vnnr * rsqinv; - V_FLOAT vn3 = delz*vnnr * rsqinv; + V_FLOAT vnnr = vr1*delx + vr2*dely + vr3*delz; + V_FLOAT vn1 = delx*vnnr * rsqinv; + V_FLOAT vn2 = dely*vnnr * rsqinv; + V_FLOAT vn3 = delz*vnnr * rsqinv; - // tangential component + // tangential component - V_FLOAT vt1 = vr1 - vn1; - V_FLOAT vt2 = vr2 - vn2; - V_FLOAT vt3 = vr3 - vn3; + V_FLOAT vt1 = vr1 - vn1; + V_FLOAT vt2 = vr2 - vn2; + V_FLOAT vt3 = vr3 - vn3; - // relative rotational velocity + // relative rotational velocity - V_FLOAT wr1 = (irad*omega(i,0) + jrad*omega(j,0)) * rinv; - V_FLOAT wr2 = (irad*omega(i,1) + jrad*omega(j,1)) * rinv; - V_FLOAT wr3 = (irad*omega(i,2) + jrad*omega(j,2)) * rinv; + V_FLOAT wr1 = (irad*omega(i,0) + jrad*omega(j,0)) * rinv; + V_FLOAT wr2 = (irad*omega(i,1) + jrad*omega(j,1)) * rinv; + V_FLOAT wr3 = (irad*omega(i,2) + jrad*omega(j,2)) * rinv; - LMP_FLOAT meff = imass*jmass / (imass+jmass); - if (mask[i] & freeze_group_bit) meff = jmass; - if (mask[j] & freeze_group_bit) meff = imass; + LMP_FLOAT meff = imass*jmass / (imass+jmass); + if (mask[i] & freeze_group_bit) meff = jmass; + if (mask[j] & freeze_group_bit) meff = imass; - F_FLOAT damp = meff*gamman*vnnr*rsqinv; - F_FLOAT ccel = kn*(radsum-r)*rinv - damp; + F_FLOAT damp = meff*gamman*vnnr*rsqinv; + F_FLOAT ccel = kn*(radsum-r)*rinv - damp; - // relative velocities + // relative velocities - V_FLOAT vtr1 = vt1 - (delz*wr2-dely*wr3); - V_FLOAT vtr2 = vt2 - (delx*wr3-delz*wr1); - V_FLOAT vtr3 = vt3 - (dely*wr1-delx*wr2); - V_FLOAT vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3; - vrel = sqrt(vrel); + V_FLOAT vtr1 = vt1 - (delz*wr2-dely*wr3); + V_FLOAT vtr2 = vt2 - (delx*wr3-delz*wr1); + V_FLOAT vtr3 = vt3 - (dely*wr1-delx*wr2); + V_FLOAT vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3; + vrel = sqrt(vrel); - // shear history effects + // shear history effects - d_firsttouch(i,jj) = 1; - X_FLOAT shear1 = d_firstshear(i,3*jj); - X_FLOAT shear2 = d_firstshear(i,3*jj+1); - X_FLOAT shear3 = d_firstshear(i,3*jj+2); - if (SHEARUPDATE) { - shear1 += vtr1*dt; - shear2 += vtr2*dt; - shear3 += vtr3*dt; - } - X_FLOAT shrmag = sqrt(shear1*shear1 + shear2*shear2 + - shear3*shear3); + X_FLOAT shear1 = d_firstshear(i,3*m); + X_FLOAT shear2 = d_firstshear(i,3*m+1); + X_FLOAT shear3 = d_firstshear(i,3*m+2); + if (SHEARUPDATE) { + shear1 += vtr1*dt; + shear2 += vtr2*dt; + shear3 += vtr3*dt; + } + X_FLOAT shrmag = sqrt(shear1*shear1 + shear2*shear2 + + shear3*shear3); - // rotate shear displacements + // rotate shear displacements - X_FLOAT rsht = shear1*delx + shear2*dely + shear3*delz; - rsht *= rsqinv; - if (SHEARUPDATE) { - shear1 -= rsht*delx; - shear2 -= rsht*dely; - shear3 -= rsht*delz; - } + X_FLOAT rsht = shear1*delx + shear2*dely + shear3*delz; + rsht *= rsqinv; + if (SHEARUPDATE) { + shear1 -= rsht*delx; + shear2 -= rsht*dely; + shear3 -= rsht*delz; + } - // tangential forces = shear + tangential velocity damping - - F_FLOAT fs1 = - (kt*shear1 + meff*gammat*vtr1); - F_FLOAT fs2 = - (kt*shear2 + meff*gammat*vtr2); - F_FLOAT fs3 = - (kt*shear3 + meff*gammat*vtr3); - - // rescale frictional displacements and forces if needed - - F_FLOAT fs = sqrt(fs1*fs1 + fs2*fs2 + fs3*fs3); - F_FLOAT fn = xmu * fabs(ccel*r); - - if (fs > fn) { - if (shrmag != 0.0) { - shear1 = (fn/fs) * (shear1 + meff*gammat*vtr1/kt) - - meff*gammat*vtr1/kt; - shear2 = (fn/fs) * (shear2 + meff*gammat*vtr2/kt) - - meff*gammat*vtr2/kt; - shear3 = (fn/fs) * (shear3 + meff*gammat*vtr3/kt) - - meff*gammat*vtr3/kt; - fs1 *= fn/fs; - fs2 *= fn/fs; - fs3 *= fn/fs; - } else fs1 = fs2 = fs3 = 0.0; - } + // tangential forces = shear + tangential velocity damping + + F_FLOAT fs1 = - (kt*shear1 + meff*gammat*vtr1); + F_FLOAT fs2 = - (kt*shear2 + meff*gammat*vtr2); + F_FLOAT fs3 = - (kt*shear3 + meff*gammat*vtr3); + + // rescale frictional displacements and forces if needed + + F_FLOAT fs = sqrt(fs1*fs1 + fs2*fs2 + fs3*fs3); + F_FLOAT fn = xmu * fabs(ccel*r); + + if (fs > fn) { + if (shrmag != 0.0) { + shear1 = (fn/fs) * (shear1 + meff*gammat*vtr1/kt) - + meff*gammat*vtr1/kt; + shear2 = (fn/fs) * (shear2 + meff*gammat*vtr2/kt) - + meff*gammat*vtr2/kt; + shear3 = (fn/fs) * (shear3 + meff*gammat*vtr3/kt) - + meff*gammat*vtr3/kt; + fs1 *= fn/fs; + fs2 *= fn/fs; + fs3 *= fn/fs; + } else fs1 = fs2 = fs3 = 0.0; + } - if (SHEARUPDATE) { - d_firstshear(i,3*jj) = shear1; - d_firstshear(i,3*jj+1) = shear2; - d_firstshear(i,3*jj+2) = shear3; - } + if (SHEARUPDATE) { + d_firstshear(i,3*m) = shear1; + d_firstshear(i,3*m+1) = shear2; + d_firstshear(i,3*m+2) = shear3; + } - // forces & torques - - F_FLOAT fx = delx*ccel + fs1; - F_FLOAT fy = dely*ccel + fs2; - F_FLOAT fz = delz*ccel + fs3; - fx_i += fx; - fy_i += fy; - fz_i += fz; - - F_FLOAT tor1 = rinv * (dely*fs3 - delz*fs2); - F_FLOAT tor2 = rinv * (delz*fs1 - delx*fs3); - F_FLOAT tor3 = rinv * (delx*fs2 - dely*fs1); - torquex_i -= irad*tor1; - torquey_i -= irad*tor2; - torquez_i -= irad*tor3; + // forces & torques + + F_FLOAT fx = delx*ccel + fs1; + F_FLOAT fy = dely*ccel + fs2; + F_FLOAT fz = delz*ccel + fs3; + fx_i += fx; + fy_i += fy; + fz_i += fz; + + F_FLOAT tor1 = rinv * (dely*fs3 - delz*fs2); + F_FLOAT tor2 = rinv * (delz*fs1 - delx*fs3); + F_FLOAT tor3 = rinv * (delx*fs2 - dely*fs1); + torquex_i -= irad*tor1; + torquey_i -= irad*tor2; + torquez_i -= irad*tor3; - if (NEWTON_PAIR || j < nlocal) { - a_f(j,0) -= fx; - a_f(j,1) -= fy; - a_f(j,2) -= fz; - a_torque(j,0) -= jrad*tor1; - a_torque(j,1) -= jrad*tor2; - a_torque(j,2) -= jrad*tor3; - } - - if (EVFLAG == 2) - ev_tally_xyz_atom<NEIGHFLAG, NEWTON_PAIR>(ev, i, j, fx_i, fy_i, fz_i, delx, dely, delz); - if (EVFLAG == 1) - ev_tally_xyz<NEWTON_PAIR>(ev, i, j, fx_i, fy_i, fz_i, delx, dely, delz); + if (NEWTON_PAIR || j < nlocal) { + a_f(j,0) -= fx; + a_f(j,1) -= fy; + a_f(j,2) -= fz; + a_torque(j,0) -= jrad*tor1; + a_torque(j,1) -= jrad*tor2; + a_torque(j,2) -= jrad*tor3; } + + if (EVFLAG == 2) + ev_tally_xyz_atom<NEIGHFLAG, NEWTON_PAIR>(ev, i, j, fx_i, fy_i, fz_i, delx, dely, delz); + if (EVFLAG == 1) + ev_tally_xyz<NEWTON_PAIR>(ev, i, j, fx_i, fy_i, fz_i, delx, dely, delz); } a_f(i,0) += fx_i; diff --git a/src/KOKKOS/pair_gran_hooke_history_kokkos.h b/src/KOKKOS/pair_gran_hooke_history_kokkos.h index 65a92f23cdca2405895dc477aacceed9a0e803e9..822b9203a46ee5945c01eec2e353ca67be42385b 100644 --- a/src/KOKKOS/pair_gran_hooke_history_kokkos.h +++ b/src/KOKKOS/pair_gran_hooke_history_kokkos.h @@ -32,7 +32,9 @@ template <class DeviceType> class FixNeighHistoryKokkos; template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int SHEARUPDATE> -struct TagPairGranHookeHistoryCompute{}; +struct TagPairGranHookeHistoryCompute {}; + +struct TagPairGranHookeHistoryReduce {}; template <class DeviceType> class PairGranHookeHistoryKokkos : public PairGranHookeHistory { @@ -46,6 +48,9 @@ class PairGranHookeHistoryKokkos : public PairGranHookeHistory { virtual void compute(int, int); void init_style(); + KOKKOS_INLINE_FUNCTION + void operator()(TagPairGranHookeHistoryReduce, const int ii) const; + template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int SHEARUPDATE> KOKKOS_INLINE_FUNCTION void operator()(TagPairGranHookeHistoryCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,SHEARUPDATE>, const int, EV_FLOAT &ev) const; @@ -88,6 +93,9 @@ class PairGranHookeHistoryKokkos : public PairGranHookeHistory { typename Kokkos::View<int**> d_firsttouch; typename Kokkos::View<LMP_FLOAT**> d_firstshear; + + typename AT::t_neighbors_2d d_neighbors_touch; + typename AT::t_int_1d d_numneigh_touch; int newton_pair; double special_lj[4];