diff --git a/src/GRANULAR/pair_gran_hooke_history.cpp b/src/GRANULAR/pair_gran_hooke_history.cpp index d70c7391e2ba896961640895ce98e07acc6a0cd5..df6db8ddfaef10e944d29e8b9bce3b5313447031 100644 --- a/src/GRANULAR/pair_gran_hooke_history.cpp +++ b/src/GRANULAR/pair_gran_hooke_history.cpp @@ -63,6 +63,8 @@ PairGranHookeHistory::PairGranHookeHistory(LAMMPS *lmp) : Pair(lmp) PairGranHookeHistory::~PairGranHookeHistory() { + if (copymode) return; + delete [] svector; if (fix_history) modify->delete_fix("NEIGH_HISTORY"); diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index c6fab2a1b10f5d9f45c6bac72de82dd1ab5d8bf1..73fa21cb41f81769cb2b7d07bc8bc5f236d2ed29 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -71,6 +71,8 @@ action atom_vec_kokkos.cpp action atom_vec_kokkos.h action atom_vec_molecular_kokkos.cpp atom_vec_molecular.cpp action atom_vec_molecular_kokkos.h atom_vec_molecular.h +action atom_vec_sphere_kokkos.cpp atom_vec_sphere.cpp +action atom_vec_sphere_kokkos.h atom_vec_sphere.h action bond_class2_kokkos.cpp bond_class2.cpp action bond_class2_kokkos.h bond_class2.h action bond_fene_kokkos.cpp bond_fene.cpp @@ -97,6 +99,8 @@ action fix_eos_table_rx_kokkos.cpp fix_eos_table_rx.cpp action fix_eos_table_rx_kokkos.h fix_eos_table_rx.h action fix_langevin_kokkos.cpp action fix_langevin_kokkos.h +action fix_neigh_history_kokkos.cpp +action fix_neigh_history_kokkos.h action fix_nh_kokkos.cpp action fix_nh_kokkos.h action fix_nph_kokkos.cpp @@ -191,6 +195,8 @@ action pair_eam_fs_kokkos.cpp pair_eam_fs.cpp action pair_eam_fs_kokkos.h pair_eam_fs.h action pair_exp6_rx_kokkos.cpp pair_exp6_rx.cpp action pair_exp6_rx_kokkos.h pair_exp6_rx.h +action pair_gran_hooke_history_kokkos.h +action pair_gran_hooke_history_kokkos.cpp action pair_hybrid_kokkos.cpp action pair_hybrid_kokkos.h action pair_hybrid_overlay_kokkos.cpp diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.cpp b/src/KOKKOS/atom_vec_sphere_kokkos.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c2788526d7c8d71af51fb9df660301823f2bbc60 --- /dev/null +++ b/src/KOKKOS/atom_vec_sphere_kokkos.cpp @@ -0,0 +1,2155 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include <cmath> +#include <cstdlib> +#include <cstring> +#include "atom_vec_sphere_kokkos.h" +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "comm_kokkos.h" +#include "domain.h" +#include "modify.h" +#include "force.h" +#include "fix.h" +#include "fix_adapt.h" +#include "math_const.h" +#include "memory.h" +#include "error.h" +#include "memory_kokkos.h" + +using namespace LAMMPS_NS; + +#define DELTA 10000 + +static const double MY_PI = 3.14159265358979323846; // pi + +/* ---------------------------------------------------------------------- */ + +AtomVecSphereKokkos::AtomVecSphereKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) +{ + molecular = 0; + + comm_x_only = 1; + comm_f_only = 0; + size_forward = 3; + size_reverse = 6; + size_border = 8; + size_velocity = 6; + size_data_atom = 7; + size_data_vel = 7; + xcol_data = 5; + + atom->sphere_flag = 1; + atom->radius_flag = atom->rmass_flag = atom->omega_flag = + atom->torque_flag = 1; + + k_count = DAT::tdual_int_1d("atom::k_count",1); + atomKK = (AtomKokkos *) atom; + commKK = (CommKokkos *) comm; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::init() +{ + AtomVec::init(); + + // set radvary if particle diameters are time-varying due to fix adapt + + radvary = 0; + comm_x_only = 1; + size_forward = 3; + + for (int i = 0; i < modify->nfix; i++) { + if (strcmp(modify->fix[i]->style,"adapt") == 0) { + FixAdapt *fix = (FixAdapt *) modify->fix[i]; + if (fix->diamflag) { + radvary = 1; + comm_x_only = 0; + size_forward = 5; + } + } + } +} + +/* ---------------------------------------------------------------------- + grow atom arrays + n = 0 grows arrays by a chunk + n > 0 allocates arrays to size n +------------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::grow(int n) +{ + if (n == 0) nmax += DELTA; + else nmax = n; + atom->nmax = nmax; + if (nmax < 0 || nmax > MAXSMALLINT) + error->one(FLERR,"Per-processor system is too big"); + + sync(Device,ALL_MASK); + modified(Device,ALL_MASK); + + memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); + memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); + memoryKK->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask"); + memoryKK->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image"); + + memoryKK->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x"); + memoryKK->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v"); + memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f"); + memoryKK->grow_kokkos(atomKK->k_radius,atomKK->radius,nmax,"atom:radius"); + memoryKK->grow_kokkos(atomKK->k_rmass,atomKK->rmass,nmax,"atom:rmass"); + memoryKK->grow_kokkos(atomKK->k_omega,atomKK->omega,nmax,3,"atom:omega"); + memoryKK->grow_kokkos(atomKK->k_torque,atomKK->torque,nmax,3,"atom:torque"); + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax); + + grow_reset(); + sync(Host,ALL_MASK); +} + +/* ---------------------------------------------------------------------- + reset local array ptrs +------------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::grow_reset() +{ + tag = atomKK->tag; + d_tag = atomKK->k_tag.d_view; + h_tag = atomKK->k_tag.h_view; + + type = atomKK->type; + d_type = atomKK->k_type.d_view; + h_type = atomKK->k_type.h_view; + mask = atomKK->mask; + d_mask = atomKK->k_mask.d_view; + h_mask = atomKK->k_mask.h_view; + image = atomKK->image; + d_image = atomKK->k_image.d_view; + h_image = atomKK->k_image.h_view; + + x = atomKK->x; + d_x = atomKK->k_x.d_view; + h_x = atomKK->k_x.h_view; + v = atomKK->v; + d_v = atomKK->k_v.d_view; + h_v = atomKK->k_v.h_view; + f = atomKK->f; + d_f = atomKK->k_f.d_view; + h_f = atomKK->k_f.h_view; + radius = atomKK->radius; + d_radius = atomKK->k_radius.d_view; + h_radius = atomKK->k_radius.h_view; + rmass = atomKK->rmass; + d_rmass = atomKK->k_rmass.d_view; + h_rmass = atomKK->k_rmass.h_view; + omega = atomKK->omega; + d_omega = atomKK->k_omega.d_view; + h_omega = atomKK->k_omega.h_view; + torque = atomKK->torque; + d_torque = atomKK->k_torque.d_view; + h_torque = atomKK->k_torque.h_view; +} + +/* ---------------------------------------------------------------------- + copy atom I info to atom J +------------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::copy(int i, int j, int delflag) +{ + sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK | RADIUS_MASK | + RMASS_MASK | OMEGA_MASK); + + h_tag[j] = h_tag[i]; + h_type[j] = h_type[i]; + h_mask[j] = h_mask[i]; + h_image[j] = h_image[i]; + h_x(j,0) = h_x(i,0); + h_x(j,1) = h_x(i,1); + h_x(j,2) = h_x(i,2); + h_v(j,0) = h_v(i,0); + h_v(j,1) = h_v(i,1); + h_v(j,2) = h_v(i,2); + + h_radius[j] = h_radius[i]; + h_rmass[j] = h_rmass[i]; + h_omega(j,0) = h_omega(i,0); + h_omega(j,1) = h_omega(i,1); + h_omega(j,2) = h_omega(i,2); + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag); + + modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK | RADIUS_MASK | + RMASS_MASK | OMEGA_MASK); +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType,int PBC_FLAG,int TRICLINIC> +struct AtomVecSphereKokkos_PackComm { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_x_array_randomread _x; + typename ArrayTypes<DeviceType>::t_float_1d _radius,_rmass; + typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf; + typename ArrayTypes<DeviceType>::t_int_2d_const _list; + const int _iswap; + X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; + X_FLOAT _pbc[6]; + + AtomVecSphereKokkos_PackComm( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_float_1d &radius, + const typename DAT::tdual_float_1d &rmass, + const typename DAT::tdual_xfloat_2d &buf, + const typename DAT::tdual_int_2d &list, + const int & iswap, + const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, + const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): + _x(x.view<DeviceType>()), + _radius(radius.view<DeviceType>()), + _rmass(rmass.view<DeviceType>()), + _list(list.view<DeviceType>()),_iswap(iswap), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz) { + const size_t maxsend = (buf.view<DeviceType>().extent(0)*buf.view<DeviceType>().extent(1))/3; + const size_t elements = 5; + buffer_view<DeviceType>(_buf,buf,maxsend,elements); + _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; + _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _buf(i,0) = _x(j,0); + _buf(i,1) = _x(j,1); + _buf(i,2) = _x(j,2); + } else { + if (TRICLINIC == 0) { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + } else { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + } + } + _buf(i,3) = _radius(j); + _buf(i,4) = _rmass(j); + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::pack_comm_kokkos( + const int &n, + const DAT::tdual_int_2d &list, + const int & iswap, + const DAT::tdual_xfloat_2d &buf, + const int &pbc_flag, + const int* const pbc) +{ + // Fallback to AtomVecKokkos if radvary == 0 + if (radvary == 0) + return AtomVecKokkos::pack_comm_kokkos(n,list,iswap,buf,pbc_flag,pbc); + // Check whether to always run forward communication on the host + // Choose correct forward PackComm kernel + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecSphereKokkos_PackComm<LMPHostType,1,1> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecSphereKokkos_PackComm<LMPHostType,1,0> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecSphereKokkos_PackComm<LMPHostType,0,1> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecSphereKokkos_PackComm<LMPHostType,0,0> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + } else { + sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecSphereKokkos_PackComm<LMPDeviceType,1,1> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecSphereKokkos_PackComm<LMPDeviceType,1,0> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecSphereKokkos_PackComm<LMPDeviceType,0,1> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecSphereKokkos_PackComm<LMPDeviceType,0,0> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + } + return n*size_forward; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType,int PBC_FLAG,int TRICLINIC> +struct AtomVecSphereKokkos_PackCommSelf { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_x_array_randomread _x; + typename ArrayTypes<DeviceType>::t_x_array _xw; + typename ArrayTypes<DeviceType>::t_float_1d _radius,_rmass; + int _nfirst; + typename ArrayTypes<DeviceType>::t_int_2d_const _list; + const int _iswap; + X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; + X_FLOAT _pbc[6]; + + AtomVecSphereKokkos_PackCommSelf( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_float_1d &radius, + const typename DAT::tdual_float_1d &rmass, + const int &nfirst, + const typename DAT::tdual_int_2d &list, + const int & iswap, + const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, + const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): + _x(x.view<DeviceType>()),_xw(x.view<DeviceType>()), + _radius(radius.view<DeviceType>()), + _rmass(rmass.view<DeviceType>()), + _nfirst(nfirst),_list(list.view<DeviceType>()),_iswap(iswap), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz) { + _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; + _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _xw(i+_nfirst,0) = _x(j,0); + _xw(i+_nfirst,1) = _x(j,1); + _xw(i+_nfirst,2) = _x(j,2); + } else { + if (TRICLINIC == 0) { + _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd; + _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd; + _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; + } else { + _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; + } + } + _radius(i+_nfirst) = _radius(j); + _rmass(i+_nfirst) = _rmass(j); + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::pack_comm_self( + const int &n, const DAT::tdual_int_2d &list, const int & iswap, + const int nfirst, const int &pbc_flag, const int* const pbc) { + // Fallback to AtomVecKokkos if radvary == 0 + if (radvary == 0) + return AtomVecKokkos::pack_comm_self(n,list,iswap,nfirst,pbc_flag,pbc); + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK); + modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecSphereKokkos_PackCommSelf<LMPHostType,1,1> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecSphereKokkos_PackCommSelf<LMPHostType,1,0> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecSphereKokkos_PackCommSelf<LMPHostType,0,1> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecSphereKokkos_PackCommSelf<LMPHostType,0,0> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + } else { + sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK); + modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecSphereKokkos_PackCommSelf<LMPDeviceType,1,1> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecSphereKokkos_PackCommSelf<LMPDeviceType,1,0> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecSphereKokkos_PackCommSelf<LMPDeviceType,0,1> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecSphereKokkos_PackCommSelf<LMPDeviceType,0,0> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + } + return n*3; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +struct AtomVecSphereKokkos_UnpackComm { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_x_array _x; + typename ArrayTypes<DeviceType>::t_float_1d _radius,_rmass; + typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf; + int _first; + + AtomVecSphereKokkos_UnpackComm( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_float_1d &radius, + const typename DAT::tdual_float_1d &rmass, + const typename DAT::tdual_xfloat_2d &buf, + const int& first): + _x(x.view<DeviceType>()), + _radius(radius.view<DeviceType>()), + _rmass(rmass.view<DeviceType>()), + _buf(buf.view<DeviceType>()), + _first(first) {}; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + _x(i+_first,0) = _buf(i,0); + _x(i+_first,1) = _buf(i,1); + _x(i+_first,2) = _buf(i,2); + _radius(i+_first) = _buf(i,3); + _rmass(i+_first) = _buf(i,4); + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::unpack_comm_kokkos( + const int &n, const int &first, + const DAT::tdual_xfloat_2d &buf ) { + // Fallback to AtomVecKokkos if radvary == 0 + if (radvary == 0) { + AtomVecKokkos::unpack_comm_kokkos(n,first,buf); + return; + } + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK); + modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK); + struct AtomVecSphereKokkos_UnpackComm<LMPHostType> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + buf,first); + Kokkos::parallel_for(n,f); + } else { + sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK); + modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK); + struct AtomVecSphereKokkos_UnpackComm<LMPDeviceType> f( + atomKK->k_x, + atomKK->k_radius,atomKK->k_rmass, + buf,first); + Kokkos::parallel_for(n,f); + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::pack_comm(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz; + + if (radvary == 0) { + // Not sure if we need to call sync for X here + sync(Host,X_MASK); + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; + dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; + dz = pbc[2]*domain->zprd; + } + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + } + } + } else { + sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK); + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = h_radius[j]; + buf[m++] = h_rmass[j]; + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; + dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; + dz = pbc[2]*domain->zprd; + } + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = h_radius[j]; + buf[m++] = h_rmass[j]; + } + } + } + + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::pack_comm_vel(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz,dvx,dvy,dvz; + + if (radvary == 0) { + sync(Host,X_MASK|V_MASK|OMEGA_MASK); + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + buf[m++] = h_omega(j,0); + buf[m++] = h_omega(j,1); + buf[m++] = h_omega(j,2); + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; + dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; + dz = pbc[2]*domain->zprd; + } + if (!deform_vremap) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + buf[m++] = h_omega(j,0); + buf[m++] = h_omega(j,1); + buf[m++] = h_omega(j,2); + } + } else { + dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; + dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; + dvz = pbc[2]*h_rate[2]; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + if (mask[i] & deform_groupbit) { + buf[m++] = h_v(j,0) + dvx; + buf[m++] = h_v(j,1) + dvy; + buf[m++] = h_v(j,2) + dvz; + } else { + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + buf[m++] = h_omega(j,0); + buf[m++] = h_omega(j,1); + buf[m++] = h_omega(j,2); + } + } + } + + } else { + sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = h_radius[j]; + buf[m++] = h_rmass[j]; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + buf[m++] = h_omega(j,0); + buf[m++] = h_omega(j,1); + buf[m++] = h_omega(j,2); + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; + dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; + dz = pbc[2]*domain->zprd; + } + if (!deform_vremap) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = h_radius[j]; + buf[m++] = h_rmass[j]; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + buf[m++] = h_omega(j,0); + buf[m++] = h_omega(j,1); + buf[m++] = h_omega(j,2); + } + } else { + dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; + dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; + dvz = pbc[2]*h_rate[2]; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = h_radius[j]; + buf[m++] = h_rmass[j]; + if (mask[i] & deform_groupbit) { + buf[m++] = h_v(j,0) + dvx; + buf[m++] = h_v(j,1) + dvy; + buf[m++] = h_v(j,2) + dvz; + } else { + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + buf[m++] = h_omega(j,0); + buf[m++] = h_omega(j,1); + buf[m++] = h_omega(j,2); + } + } + } + } + + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::pack_comm_hybrid(int n, int *list, double *buf) +{ + if (radvary == 0) return 0; + + sync(Host,RADIUS_MASK|RMASS_MASK); + + int m = 0; + for (int i = 0; i < n; i++) { + const int j = list[i]; + buf[m++] = h_radius[j]; + buf[m++] = h_rmass[j]; + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::unpack_comm(int n, int first, double *buf) +{ + if (radvary == 0) { + int m = 0; + const int last = first + n; + for (int i = first; i < last; i++) { + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + } + modified(Host,X_MASK); + } else { + int m = 0; + const int last = first + n; + for (int i = first; i < last; i++) { + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_radius[i] = buf[m++]; + h_rmass[i] = buf[m++]; + } + modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK); + } +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::unpack_comm_vel(int n, int first, double *buf) +{ + if (radvary == 0) { + int m = 0; + const int last = first + n; + for (int i = first; i < last; i++) { + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_v(i,0) = buf[m++]; + h_v(i,1) = buf[m++]; + h_v(i,2) = buf[m++]; + h_omega(i,0) = buf[m++]; + h_omega(i,1) = buf[m++]; + h_omega(i,2) = buf[m++]; + } + modified(Host,X_MASK|V_MASK|OMEGA_MASK); + } else { + int m = 0; + const int last = first + n; + for (int i = first; i < last; i++) { + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_radius[i] = buf[m++]; + h_rmass[i] = buf[m++]; + h_v(i,0) = buf[m++]; + h_v(i,1) = buf[m++]; + h_v(i,2) = buf[m++]; + h_omega(i,0) = buf[m++]; + h_omega(i,1) = buf[m++]; + h_omega(i,2) = buf[m++]; + } + modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::unpack_comm_hybrid(int n, int first, double *buf) +{ + if (radvary == 0) return 0; + + int m = 0; + const int last = first + n; + for (int i = first; i < last; i++) { + h_radius[i] = buf[m++]; + h_rmass[i] = buf[m++]; + } + modified(Host,RADIUS_MASK|RMASS_MASK); + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::pack_reverse(int n, int first, double *buf) +{ + if(n > 0) + sync(Host,F_MASK|TORQUE_MASK); + + int m = 0; + const int last = first + n; + for (int i = first; i < last; i++) { + buf[m++] = h_f(i,0); + buf[m++] = h_f(i,1); + buf[m++] = h_f(i,2); + buf[m++] = h_torque(i,0); + buf[m++] = h_torque(i,1); + buf[m++] = h_torque(i,2); + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::pack_reverse_hybrid(int n, int first, double *buf) +{ + if(n > 0) + sync(Host,TORQUE_MASK); + + int m = 0; + const int last = first + n; + for (int i = first; i < last; i++) { + buf[m++] = h_torque(i,0); + buf[m++] = h_torque(i,1); + buf[m++] = h_torque(i,2); + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::unpack_reverse(int n, int *list, double *buf) +{ + if(n > 0) { + sync(Host,F_MASK|TORQUE_MASK); + modified(Host,F_MASK|TORQUE_MASK); + } + + int m = 0; + for (int i = 0; i < n; i++) { + const int j = list[i]; + h_f(j,0) += buf[m++]; + h_f(j,1) += buf[m++]; + h_f(j,2) += buf[m++]; + h_torque(j,0) += buf[m++]; + h_torque(j,1) += buf[m++]; + h_torque(j,2) += buf[m++]; + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::unpack_reverse_hybrid(int n, int *list, double *buf) +{ + if(n > 0) { + sync(Host,TORQUE_MASK); + modified(Host,TORQUE_MASK); + } + + int m = 0; + for (int i = 0; i < n; i++) { + const int j = list[i]; + h_torque(j,0) += buf[m++]; + h_torque(j,1) += buf[m++]; + h_torque(j,2) += buf[m++]; + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType,int PBC_FLAG> +struct AtomVecSphereKokkos_PackBorder { + typedef DeviceType device_type; + + typename ArrayTypes<DeviceType>::t_xfloat_2d _buf; + const typename ArrayTypes<DeviceType>::t_int_2d_const _list; + const int _iswap; + const typename ArrayTypes<DeviceType>::t_x_array_randomread _x; + const typename ArrayTypes<DeviceType>::t_tagint_1d _tag; + const typename ArrayTypes<DeviceType>::t_int_1d _type; + const typename ArrayTypes<DeviceType>::t_int_1d _mask; + typename ArrayTypes<DeviceType>::t_float_1d _radius,_rmass; + X_FLOAT _dx,_dy,_dz; + + AtomVecSphereKokkos_PackBorder(const typename ArrayTypes<DeviceType>::t_xfloat_2d &buf, + const typename ArrayTypes<DeviceType>::t_int_2d_const &list, + const int & iswap, + const typename ArrayTypes<DeviceType>::t_x_array &x, + const typename ArrayTypes<DeviceType>::t_tagint_1d &tag, + const typename ArrayTypes<DeviceType>::t_int_1d &type, + const typename ArrayTypes<DeviceType>::t_int_1d &mask, + const typename ArrayTypes<DeviceType>::t_float_1d &radius, + const typename ArrayTypes<DeviceType>::t_float_1d &rmass, + const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz): + _buf(buf),_list(list),_iswap(iswap), + _x(x),_tag(tag),_type(type),_mask(mask), + _radius(radius), + _rmass(rmass), + _dx(dx),_dy(dy),_dz(dz) {} + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _buf(i,0) = _x(j,0); + _buf(i,1) = _x(j,1); + _buf(i,2) = _x(j,2); + } else { + _buf(i,0) = _x(j,0) + _dx; + _buf(i,1) = _x(j,1) + _dy; + _buf(i,2) = _x(j,2) + _dz; + } + _buf(i,3) = _tag(j); + _buf(i,4) = _type(j); + _buf(i,5) = _mask(j); + _buf(i,6) = _radius(j); + _buf(i,7) = _rmass(j); + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap, + int pbc_flag, int *pbc, ExecutionSpace space) +{ + X_FLOAT dx,dy,dz; + + // This was in atom_vec_dpd_kokkos but doesn't appear in any other atom_vec + sync(space,ALL_MASK); + + if (pbc_flag != 0) { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + if(space==Host) { + AtomVecSphereKokkos_PackBorder<LMPHostType,1> f( + buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(), + iswap,h_x,h_tag,h_type,h_mask, + h_radius,h_rmass, + dx,dy,dz); + Kokkos::parallel_for(n,f); + } else { + AtomVecSphereKokkos_PackBorder<LMPDeviceType,1> f( + buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(), + iswap,d_x,d_tag,d_type,d_mask, + d_radius,d_rmass, + dx,dy,dz); + Kokkos::parallel_for(n,f); + } + } else { + dx = dy = dz = 0; + if(space==Host) { + AtomVecSphereKokkos_PackBorder<LMPHostType,0> f( + buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(), + iswap,h_x,h_tag,h_type,h_mask, + h_radius,h_rmass, + dx,dy,dz); + Kokkos::parallel_for(n,f); + } else { + AtomVecSphereKokkos_PackBorder<LMPDeviceType,0> f( + buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(), + iswap,d_x,d_tag,d_type,d_mask, + d_radius,d_rmass, + dx,dy,dz); + Kokkos::parallel_for(n,f); + } + } + return n*6; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::pack_border(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz; + + sync(Host,ALL_MASK); + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = ubuf(h_tag[j]).d; + buf[m++] = ubuf(h_type[j]).d; + buf[m++] = ubuf(h_mask[j]).d; + buf[m++] = h_radius[j]; + buf[m++] = h_rmass[j]; + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = ubuf(h_tag[j]).d; + buf[m++] = ubuf(h_type[j]).d; + buf[m++] = ubuf(h_mask[j]).d; + buf[m++] = h_radius[j]; + buf[m++] = h_rmass[j]; + } + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]); + + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::pack_border_vel(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz,dvx,dvy,dvz; + + sync(Host,ALL_MASK); + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = ubuf(h_tag[j]).d; + buf[m++] = ubuf(h_type[j]).d; + buf[m++] = ubuf(h_mask[j]).d; + buf[m++] = h_radius[j]; + buf[m++] = h_rmass[j]; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + buf[m++] = h_omega(j,0); + buf[m++] = h_omega(j,1); + buf[m++] = h_omega(j,2); + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + if (!deform_vremap) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = ubuf(h_tag[j]).d; + buf[m++] = ubuf(h_type[j]).d; + buf[m++] = ubuf(h_mask[j]).d; + buf[m++] = h_radius[j]; + buf[m++] = h_rmass[j]; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + buf[m++] = h_omega(j,0); + buf[m++] = h_omega(j,1); + buf[m++] = h_omega(j,2); + } + } else { + dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; + dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; + dvz = pbc[2]*h_rate[2]; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = ubuf(h_tag[j]).d; + buf[m++] = ubuf(h_type[j]).d; + buf[m++] = ubuf(h_mask[j]).d; + buf[m++] = h_radius[j]; + buf[m++] = h_rmass[j]; + if (mask[i] & deform_groupbit) { + buf[m++] = h_v(j,0) + dvx; + buf[m++] = h_v(j,1) + dvy; + buf[m++] = h_v(j,2) + dvz; + } else { + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + buf[m++] = h_omega(j,0); + buf[m++] = h_omega(j,1); + buf[m++] = h_omega(j,2); + } + } + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]); + + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::pack_border_hybrid(int n, int *list, double *buf) +{ + sync(Host,RADIUS_MASK|RMASS_MASK); + + int m = 0; + for (int i = 0; i < n; i++) { + const int j = list[i]; + buf[m++] = h_radius[j]; + buf[m++] = h_rmass[j]; + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +struct AtomVecSphereKokkos_UnpackBorder { + typedef DeviceType device_type; + + const typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf; + typename ArrayTypes<DeviceType>::t_x_array _x; + typename ArrayTypes<DeviceType>::t_tagint_1d _tag; + typename ArrayTypes<DeviceType>::t_int_1d _type; + typename ArrayTypes<DeviceType>::t_int_1d _mask; + typename ArrayTypes<DeviceType>::t_float_1d _radius,_rmass; + int _first; + + AtomVecSphereKokkos_UnpackBorder( + const typename ArrayTypes<DeviceType>::t_xfloat_2d_const &buf, + typename ArrayTypes<DeviceType>::t_x_array &x, + typename ArrayTypes<DeviceType>::t_tagint_1d &tag, + typename ArrayTypes<DeviceType>::t_int_1d &type, + typename ArrayTypes<DeviceType>::t_int_1d &mask, + const typename ArrayTypes<DeviceType>::t_float_1d &radius, + const typename ArrayTypes<DeviceType>::t_float_1d &rmass, + const int& first): + _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask), + _radius(radius), + _rmass(rmass), + _first(first) {}; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + _x(i+_first,0) = _buf(i,0); + _x(i+_first,1) = _buf(i,1); + _x(i+_first,2) = _buf(i,2); + _tag(i+_first) = static_cast<int> (_buf(i,3)); + _type(i+_first) = static_cast<int> (_buf(i,4)); + _mask(i+_first) = static_cast<int> (_buf(i,5)); + _radius(i+_first) = _buf(i,6); + _rmass(i+_first) = _buf(i,7); + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::unpack_border_kokkos(const int &n, const int &first, + const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) { + modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| + RADIUS_MASK|RMASS_MASK); + while (first+n >= nmax) grow(0); + modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| + RADIUS_MASK|RMASS_MASK); + if(space==Host) { + struct AtomVecSphereKokkos_UnpackBorder<LMPHostType> f(buf.view<LMPHostType>(), + h_x,h_tag,h_type,h_mask, + h_radius,h_rmass, + first); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecSphereKokkos_UnpackBorder<LMPDeviceType> f(buf.view<LMPDeviceType>(), + d_x,d_tag,d_type,d_mask, + d_radius,d_rmass, + first); + Kokkos::parallel_for(n,f); + } +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::unpack_border(int n, int first, double *buf) +{ + int m = 0; + const int last = first + n; + for (int i = first; i < last; i++) { + if (i == nmax) grow(0); + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_tag[i] = (tagint) ubuf(buf[m++]).i; + h_type[i] = (int) ubuf(buf[m++]).i; + h_mask[i] = (int) ubuf(buf[m++]).i; + h_radius[i] = buf[m++]; + h_rmass[i] = buf[m++]; + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]-> + unpack_border(n,first,&buf[m]); + + modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|RADIUS_MASK|RMASS_MASK); +} + + +/* ---------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::unpack_border_vel(int n, int first, double *buf) +{ + int m = 0; + const int last = first + n; + for (int i = first; i < last; i++) { + if (i == nmax) grow(0); + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_tag[i] = (tagint) ubuf(buf[m++]).i; + h_type[i] = (int) ubuf(buf[m++]).i; + h_mask[i] = (int) ubuf(buf[m++]).i; + h_radius[i] = buf[m++]; + h_rmass[i] = buf[m++]; + h_v(i,0) = buf[m++]; + h_v(i,1) = buf[m++]; + h_v(i,2) = buf[m++]; + h_omega(i,0) = buf[m++]; + h_omega(i,1) = buf[m++]; + h_omega(i,2) = buf[m++]; + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]-> + unpack_border(n,first,&buf[m]); + + modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::unpack_border_hybrid(int n, int first, double *buf) +{ + int m = 0; + const int last = first + n; + for (int i = first; i < last; i++) { + h_radius[i] = buf[m++]; + h_rmass[i] = buf[m++]; + } + modified(Host,RADIUS_MASK|RMASS_MASK); + return m; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +struct AtomVecSphereKokkos_PackExchangeFunctor { + typedef DeviceType device_type; + typedef ArrayTypes<DeviceType> AT; + typename AT::t_x_array_randomread _x; + typename AT::t_v_array_randomread _v; + typename AT::t_tagint_1d_randomread _tag; + typename AT::t_int_1d_randomread _type; + typename AT::t_int_1d_randomread _mask; + typename AT::t_imageint_1d_randomread _image; + typename AT::t_float_1d_randomread _radius,_rmass; + typename AT::t_v_array_randomread _omega; + typename AT::t_x_array _xw; + typename AT::t_v_array _vw; + typename AT::t_tagint_1d _tagw; + typename AT::t_int_1d _typew; + typename AT::t_int_1d _maskw; + typename AT::t_imageint_1d _imagew; + typename AT::t_float_1d _radiusw,_rmassw; + typename AT::t_v_array _omegaw; + typename AT::t_xfloat_2d_um _buf; + typename AT::t_int_1d_const _sendlist; + typename AT::t_int_1d_const _copylist; + int _nlocal,_dim; + X_FLOAT _lo,_hi; + + AtomVecSphereKokkos_PackExchangeFunctor( + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d sendlist, + typename AT::tdual_int_1d copylist,int nlocal, int dim,X_FLOAT lo, X_FLOAT hi): + _x(atom->k_x.view<DeviceType>()), + _v(atom->k_v.view<DeviceType>()), + _tag(atom->k_tag.view<DeviceType>()), + _type(atom->k_type.view<DeviceType>()), + _mask(atom->k_mask.view<DeviceType>()), + _image(atom->k_image.view<DeviceType>()), + _radius(atom->k_radius.view<DeviceType>()), + _rmass(atom->k_rmass.view<DeviceType>()), + _omega(atom->k_omega.view<DeviceType>()), + _xw(atom->k_x.view<DeviceType>()), + _vw(atom->k_v.view<DeviceType>()), + _tagw(atom->k_tag.view<DeviceType>()), + _typew(atom->k_type.view<DeviceType>()), + _maskw(atom->k_mask.view<DeviceType>()), + _imagew(atom->k_image.view<DeviceType>()), + _radiusw(atom->k_radius.view<DeviceType>()), + _rmassw(atom->k_rmass.view<DeviceType>()), + _omegaw(atom->k_omega.view<DeviceType>()), + _sendlist(sendlist.template view<DeviceType>()), + _copylist(copylist.template view<DeviceType>()), + _nlocal(nlocal),_dim(dim), + _lo(lo),_hi(hi){ + const size_t elements = 16; + const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements; + + buffer_view<DeviceType>(_buf,buf,maxsendlist,elements); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int &mysend) const { + const int i = _sendlist(mysend); + _buf(mysend,0) = 16; + _buf(mysend,1) = _x(i,0); + _buf(mysend,2) = _x(i,1); + _buf(mysend,3) = _x(i,2); + _buf(mysend,4) = _v(i,0); + _buf(mysend,5) = _v(i,1); + _buf(mysend,6) = _v(i,2); + _buf(mysend,7) = _tag[i]; + _buf(mysend,8) = _type[i]; + _buf(mysend,9) = _mask[i]; + _buf(mysend,10) = _image[i]; + _buf(mysend,11) = _radius[i]; + _buf(mysend,12) = _rmass[i]; + _buf(mysend,13) = _omega(i,0); + _buf(mysend,14) = _omega(i,1); + _buf(mysend,15) = _omega(i,2); + const int j = _copylist(mysend); + + if (j>-1) { + _xw(i,0) = _x(j,0); + _xw(i,1) = _x(j,1); + _xw(i,2) = _x(j,2); + _vw(i,0) = _v(j,0); + _vw(i,1) = _v(j,1); + _vw(i,2) = _v(j,2); + _tagw[i] = _tag(j); + _typew[i] = _type(j); + _maskw[i] = _mask(j); + _imagew[i] = _image(j); + _radiusw[i] = _radius(j); + _rmassw[i] = _rmass(j); + _omegaw(i,0) = _omega(j,0); + _omegaw(i,1) = _omega(j,1); + _omegaw(i,2) = _omega(j,2); + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space,int dim,X_FLOAT lo,X_FLOAT hi ) +{ + if(nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/16) { + int newsize = nsend*17/k_buf.view<LMPHostType>().extent(1)+1; + k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1)); + } + sync(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK| RADIUS_MASK | RMASS_MASK | + OMEGA_MASK); + + if(space == Host) { + AtomVecSphereKokkos_PackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + Kokkos::parallel_for(nsend,f); + } else { + AtomVecSphereKokkos_PackExchangeFunctor<LMPDeviceType> f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + Kokkos::parallel_for(nsend,f); + } + return nsend*16; +} + +/* ---------------------------------------------------------------------- + pack data for atom I for sending to another proc + xyz must be 1st 3 values, so comm::exchange() can test on them +------------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::pack_exchange(int i, double *buf) +{ + sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK| RADIUS_MASK | RMASS_MASK | + OMEGA_MASK); + + + int m = 1; + buf[m++] = h_x(i,0); + buf[m++] = h_x(i,1); + buf[m++] = h_x(i,2); + buf[m++] = h_v(i,0); + buf[m++] = h_v(i,1); + buf[m++] = h_v(i,2); + buf[m++] = ubuf(h_tag[i]).d; + buf[m++] = ubuf(h_type[i]).d; + buf[m++] = ubuf(h_mask[i]).d; + buf[m++] = ubuf(h_image[i]).d; + + buf[m++] = h_radius[i]; + buf[m++] = h_rmass[i]; + buf[m++] = h_omega(i,0); + buf[m++] = h_omega(i,1); + buf[m++] = h_omega(i,2); + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + m += modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&buf[m]); + + buf[0] = m; + return m; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +struct AtomVecSphereKokkos_UnpackExchangeFunctor { + typedef DeviceType device_type; + typedef ArrayTypes<DeviceType> AT; + typename AT::t_x_array _x; + typename AT::t_v_array _v; + typename AT::t_tagint_1d _tag; + typename AT::t_int_1d _type; + typename AT::t_int_1d _mask; + typename AT::t_imageint_1d _image; + typename AT::t_float_1d _radius; + typename AT::t_float_1d _rmass; + typename AT::t_v_array _omega; + typename AT::t_xfloat_2d_um _buf; + typename AT::t_int_1d _nlocal; + int _dim; + X_FLOAT _lo,_hi; + + AtomVecSphereKokkos_UnpackExchangeFunctor( + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d nlocal, + int dim, X_FLOAT lo, X_FLOAT hi): + _x(atom->k_x.view<DeviceType>()), + _v(atom->k_v.view<DeviceType>()), + _tag(atom->k_tag.view<DeviceType>()), + _type(atom->k_type.view<DeviceType>()), + _mask(atom->k_mask.view<DeviceType>()), + _image(atom->k_image.view<DeviceType>()), + _nlocal(nlocal.template view<DeviceType>()),_dim(dim), + _lo(lo),_hi(hi){ + const size_t elements = 16; + const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements; + + buffer_view<DeviceType>(_buf,buf,maxsendlist,elements); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int &myrecv) const { + X_FLOAT x = _buf(myrecv,_dim+1); + if (x >= _lo && x < _hi) { + int i = Kokkos::atomic_fetch_add(&_nlocal(0),1); + _x(i,0) = _buf(myrecv,1); + _x(i,1) = _buf(myrecv,2); + _x(i,2) = _buf(myrecv,3); + _v(i,0) = _buf(myrecv,4); + _v(i,1) = _buf(myrecv,5); + _v(i,2) = _buf(myrecv,6); + _tag[i] = _buf(myrecv,7); + _type[i] = _buf(myrecv,8); + _mask[i] = _buf(myrecv,9); + _image[i] = _buf(myrecv,10); + _radius[i] = _buf(myrecv,11); + _rmass[i] = _buf(myrecv,12); + _omega(i,0) = _buf(myrecv,13); + _omega(i,1) = _buf(myrecv,14); + _omega(i,2) = _buf(myrecv,15); + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) { + if(space == Host) { + k_count.h_view(0) = nlocal; + AtomVecSphereKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi); + Kokkos::parallel_for(nrecv/16,f); + } else { + k_count.h_view(0) = nlocal; + k_count.modify<LMPHostType>(); + k_count.sync<LMPDeviceType>(); + AtomVecSphereKokkos_UnpackExchangeFunctor<LMPDeviceType> f(atomKK,k_buf,k_count,dim,lo,hi); + Kokkos::parallel_for(nrecv/16,f); + k_count.modify<LMPDeviceType>(); + k_count.sync<LMPHostType>(); + } + + modified(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK| RADIUS_MASK | RMASS_MASK | + OMEGA_MASK); + + return k_count.h_view(0); +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::unpack_exchange(double *buf) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) grow(0); + + int m = 1; + h_x(nlocal,0) = buf[m++]; + h_x(nlocal,1) = buf[m++]; + h_x(nlocal,2) = buf[m++]; + h_v(nlocal,0) = buf[m++]; + h_v(nlocal,1) = buf[m++]; + h_v(nlocal,2) = buf[m++]; + h_tag[nlocal] = (tagint) ubuf(buf[m++]).i; + h_type[nlocal] = (int) ubuf(buf[m++]).i; + h_mask[nlocal] = (int) ubuf(buf[m++]).i; + h_image[nlocal] = (imageint) ubuf(buf[m++]).i; + + h_radius[nlocal] = buf[m++]; + h_rmass[nlocal] = buf[m++]; + h_omega(nlocal,0) = buf[m++]; + h_omega(nlocal,1) = buf[m++]; + h_omega(nlocal,2) = buf[m++]; + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + m += modify->fix[atom->extra_grow[iextra]]-> + unpack_exchange(nlocal,&buf[m]); + + modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK | RADIUS_MASK | RMASS_MASK | + OMEGA_MASK); + + atom->nlocal++; + return m; +} + +/* ---------------------------------------------------------------------- + size of restart data for all atoms owned by this proc + include extra data stored by fixes +------------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::size_restart() +{ + int i; + + int nlocal = atom->nlocal; + int n = 16 * nlocal; + + if (atom->nextra_restart) + for (int iextra = 0; iextra < atom->nextra_restart; iextra++) + for (i = 0; i < nlocal; i++) + n += modify->fix[atom->extra_restart[iextra]]->size_restart(i); + + return n; +} + +/* ---------------------------------------------------------------------- + pack atom I's data for restart file including extra quantities + xyz must be 1st 3 values, so that read_restart can test on them + molecular types may be negative, but write as positive +------------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::pack_restart(int i, double *buf) +{ + sync(Host,X_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK | V_MASK | + RADIUS_MASK | RMASS_MASK | OMEGA_MASK); + + int m = 1; + buf[m++] = h_x(i,0); + buf[m++] = h_x(i,1); + buf[m++] = h_x(i,2); + buf[m++] = ubuf(h_tag[i]).d; + buf[m++] = ubuf(h_type[i]).d; + buf[m++] = ubuf(h_mask[i]).d; + buf[m++] = ubuf(h_image[i]).d; + buf[m++] = h_v(i,0); + buf[m++] = h_v(i,1); + buf[m++] = h_v(i,2); + + buf[m++] = h_radius[i]; + buf[m++] = h_rmass[i]; + buf[m++] = h_omega(i,0); + buf[m++] = h_omega(i,1); + buf[m++] = h_omega(i,2); + + if (atom->nextra_restart) + for (int iextra = 0; iextra < atom->nextra_restart; iextra++) + m += modify->fix[atom->extra_restart[iextra]]->pack_restart(i,&buf[m]); + + buf[0] = m; + return m; +} + +/* ---------------------------------------------------------------------- + unpack data for one atom from restart file including extra quantities +------------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::unpack_restart(double *buf) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) { + grow(0); + if (atom->nextra_store) + memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra"); + } + + int m = 1; + h_x(nlocal,0) = buf[m++]; + h_x(nlocal,1) = buf[m++]; + h_x(nlocal,2) = buf[m++]; + h_tag[nlocal] = (tagint) ubuf(buf[m++]).i; + h_type[nlocal] = (int) ubuf(buf[m++]).i; + h_mask[nlocal] = (int) ubuf(buf[m++]).i; + h_image[nlocal] = (imageint) ubuf(buf[m++]).i; + h_v(nlocal,0) = buf[m++]; + h_v(nlocal,1) = buf[m++]; + h_v(nlocal,2) = buf[m++]; + + h_radius[nlocal] = buf[m++]; + h_rmass[nlocal] = buf[m++]; + h_omega(nlocal,0) = buf[m++]; + h_omega(nlocal,1) = buf[m++]; + h_omega(nlocal,2) = buf[m++]; + + double **extra = atom->extra; + if (atom->nextra_store) { + int size = static_cast<int> (buf[0]) - m; + for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++]; + } + + modified(Host,X_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK | V_MASK | + RADIUS_MASK | RMASS_MASK | OMEGA_MASK); + + atom->nlocal++; + return m; +} + +/* ---------------------------------------------------------------------- + create one atom of itype at coord + set other values to defaults +------------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::create_atom(int itype, double *coord) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) { + atomKK->modified(Host,ALL_MASK); + grow(0); + } + atomKK->modified(Host,ALL_MASK); + + h_tag[nlocal] = 0; + h_type[nlocal] = itype; + h_x(nlocal,0) = coord[0]; + h_x(nlocal,1) = coord[1]; + h_x(nlocal,2) = coord[2]; + h_mask[nlocal] = 1; + h_image[nlocal] = ((imageint) IMGMAX << IMG2BITS) | + ((imageint) IMGMAX << IMGBITS) | IMGMAX; + h_v(nlocal,0) = 0.0; + h_v(nlocal,1) = 0.0; + h_v(nlocal,2) = 0.0; + + h_radius[nlocal] = 0.5; + h_rmass[nlocal] = 4.0*MY_PI/3.0 * h_radius[nlocal]*h_radius[nlocal]*h_radius[nlocal]; + h_omega(nlocal,0) = 0.0; + h_omega(nlocal,1) = 0.0; + h_omega(nlocal,2) = 0.0; + + atom->nlocal++; +} + +/* ---------------------------------------------------------------------- + unpack one line from Atoms section of data file + initialize other atom quantities +------------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::data_atom(double *coord, imageint imagetmp, char **values) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) grow(0); + + tag[nlocal] = ATOTAGINT(values[0]); + type[nlocal] = atoi(values[1]); + if (type[nlocal] <= 0 || type[nlocal] > atom->ntypes) + error->one(FLERR,"Invalid atom type in Atoms section of data file"); + + radius[nlocal] = 0.5 * atof(values[2]); + if (radius[nlocal] < 0.0) + error->one(FLERR,"Invalid radius in Atoms section of data file"); + + double density = atof(values[3]); + if (density <= 0.0) + error->one(FLERR,"Invalid density in Atoms section of data file"); + + if (radius[nlocal] == 0.0) rmass[nlocal] = density; + else + rmass[nlocal] = 4.0*MY_PI/3.0 * + radius[nlocal]*radius[nlocal]*radius[nlocal] * density; + + x[nlocal][0] = coord[0]; + x[nlocal][1] = coord[1]; + x[nlocal][2] = coord[2]; + + image[nlocal] = imagetmp; + + mask[nlocal] = 1; + v[nlocal][0] = 0.0; + v[nlocal][1] = 0.0; + v[nlocal][2] = 0.0; + omega[nlocal][0] = 0.0; + omega[nlocal][1] = 0.0; + omega[nlocal][2] = 0.0; + + atom->nlocal++; +} + +/* ---------------------------------------------------------------------- + unpack hybrid quantities from one line in Atoms section of data file + initialize other atom quantities for this sub-style +------------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::data_atom_hybrid(int nlocal, char **values) +{ + radius[nlocal] = 0.5 * atof(values[0]); + if (radius[nlocal] < 0.0) + error->one(FLERR,"Invalid radius in Atoms section of data file"); + + double density = atof(values[1]); + if (density <= 0.0) + error->one(FLERR,"Invalid density in Atoms section of data file"); + + if (radius[nlocal] == 0.0) rmass[nlocal] = density; + else + rmass[nlocal] = 4.0*MY_PI/3.0 * + radius[nlocal]*radius[nlocal]*radius[nlocal] * density; + + return 2; +} + +/* ---------------------------------------------------------------------- + unpack one line from Velocities section of data file +------------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::data_vel(int m, char **values) +{ + sync(Host,V_MASK|OMEGA_MASK); + h_v(m,0) = atof(values[0]); + h_v(m,1) = atof(values[1]); + h_v(m,2) = atof(values[2]); + h_omega(m,0) = atof(values[3]); + h_omega(m,1) = atof(values[4]); + h_omega(m,2) = atof(values[5]); + modified(Host,V_MASK|OMEGA_MASK); +} + +/* ---------------------------------------------------------------------- + unpack hybrid quantities from one line in Velocities section of data file +------------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::data_vel_hybrid(int m, char **values) +{ + sync(Host,OMEGA_MASK); + omega[m][0] = atof(values[0]); + omega[m][1] = atof(values[1]); + omega[m][2] = atof(values[2]); + modified(Host,OMEGA_MASK); + return 3; +} + +/* ---------------------------------------------------------------------- + pack atom info for data file including 3 image flags +------------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::pack_data(double **buf) +{ + atomKK->sync(Host,TAG_MASK|TYPE_MASK|RADIUS_MASK|RMASS_MASK|X_MASK|IMAGE_MASK); + + int nlocal = atom->nlocal; + for (int i = 0; i < nlocal; i++) { + buf[i][0] = ubuf(h_tag[i]).d; + buf[i][1] = ubuf(h_type[i]).d; + buf[i][2] = 2.0*h_radius[i]; + if (h_radius[i] == 0.0) buf[i][3] = h_rmass[i]; + else + buf[i][3] = h_rmass[i] / (4.0*MY_PI/3.0 * h_radius[i]*h_radius[i]*h_radius[i]); + buf[i][4] = h_x(i,0); + buf[i][5] = h_x(i,1); + buf[i][6] = h_x(i,2); + buf[i][7] = ubuf((h_image[i] & IMGMASK) - IMGMAX).d; + buf[i][8] = ubuf((h_image[i] >> IMGBITS & IMGMASK) - IMGMAX).d; + buf[i][9] = ubuf((h_image[i] >> IMG2BITS) - IMGMAX).d; + } +} + +/* ---------------------------------------------------------------------- + pack hybrid atom info for data file +------------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::pack_data_hybrid(int i, double *buf) +{ + atomKK->sync(Host,RADIUS_MASK|RMASS_MASK); + + buf[0] = 2.0*h_radius[i]; + if (h_radius[i] == 0.0) buf[1] = h_rmass[i]; + else buf[1] = h_rmass[i] / (4.0*MY_PI/3.0 * h_radius[i]*h_radius[i]*h_radius[i]); + return 2; +} + +/* ---------------------------------------------------------------------- + write atom info to data file including 3 image flags +------------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::write_data(FILE *fp, int n, double **buf) +{ + for (int i = 0; i < n; i++) + fprintf(fp,TAGINT_FORMAT + " %d %-1.16e %-1.16e %-1.16e %-1.16e %-1.16e %d %d %d\n", + (tagint) ubuf(buf[i][0]).i,(int) ubuf(buf[i][1]).i, + buf[i][2],buf[i][3], + buf[i][4],buf[i][5],buf[i][6], + (int) ubuf(buf[i][7]).i,(int) ubuf(buf[i][8]).i, + (int) ubuf(buf[i][9]).i); +} + +/* ---------------------------------------------------------------------- + write hybrid atom info to data file +------------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::write_data_hybrid(FILE *fp, double *buf) +{ + fprintf(fp," %-1.16e %-1.16e",buf[0],buf[1]); + return 2; +} + +/* ---------------------------------------------------------------------- + pack velocity info for data file +------------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::pack_vel(double **buf) +{ + sync(Host,TAG_MASK|V_MASK|OMEGA_MASK); + + int nlocal = atom->nlocal; + for (int i = 0; i < nlocal; i++) { + buf[i][0] = ubuf(h_tag[i]).d; + buf[i][1] = h_v(i,0); + buf[i][2] = h_v(i,1); + buf[i][3] = h_v(i,2); + buf[i][4] = h_omega(i,0); + buf[i][5] = h_omega(i,1); + buf[i][6] = h_omega(i,2); + } +} + +/* ---------------------------------------------------------------------- + pack hybrid velocity info for data file +------------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::pack_vel_hybrid(int i, double *buf) +{ + sync(Host,OMEGA_MASK); + + buf[0] = h_omega(i,0); + buf[1] = h_omega(i,1); + buf[2] = h_omega(i,2); + return 3; +} + +/* ---------------------------------------------------------------------- + write velocity info to data file +------------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::write_vel(FILE *fp, int n, double **buf) +{ + for (int i = 0; i < n; i++) + fprintf(fp,TAGINT_FORMAT + " %-1.16e %-1.16e %-1.16e %-1.16e %-1.16e %-1.16e\n", + (tagint) ubuf(buf[i][0]).i,buf[i][1],buf[i][2],buf[i][3], + buf[i][4],buf[i][5],buf[i][6]); +} + +/* ---------------------------------------------------------------------- + write hybrid velocity info to data file +------------------------------------------------------------------------- */ + +int AtomVecSphereKokkos::write_vel_hybrid(FILE *fp, double *buf) +{ + fprintf(fp," %-1.16e %-1.16e %-1.16e",buf[0],buf[1],buf[2]); + return 3; +} + +/* ---------------------------------------------------------------------- + return # of bytes of allocated memory +------------------------------------------------------------------------- */ + +bigint AtomVecSphereKokkos::memory_usage() +{ + bigint bytes = 0; + + if (atom->memcheck("tag")) bytes += memory->usage(tag,nmax); + if (atom->memcheck("type")) bytes += memory->usage(type,nmax); + if (atom->memcheck("mask")) bytes += memory->usage(mask,nmax); + if (atom->memcheck("image")) bytes += memory->usage(image,nmax); + if (atom->memcheck("x")) bytes += memory->usage(x,nmax,3); + if (atom->memcheck("v")) bytes += memory->usage(v,nmax,3); + if (atom->memcheck("f")) bytes += memory->usage(f,nmax*comm->nthreads,3); + + if (atom->memcheck("radius")) bytes += memory->usage(radius,nmax); + if (atom->memcheck("rmass")) bytes += memory->usage(rmass,nmax); + if (atom->memcheck("omega")) bytes += memory->usage(omega,nmax,3); + if (atom->memcheck("torque")) + bytes += memory->usage(torque,nmax*comm->nthreads,3); + + return bytes; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::sync(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if (mask & X_MASK) atomKK->k_x.sync<LMPDeviceType>(); + if (mask & V_MASK) atomKK->k_v.sync<LMPDeviceType>(); + if (mask & F_MASK) atomKK->k_f.sync<LMPDeviceType>(); + if (mask & TAG_MASK) atomKK->k_tag.sync<LMPDeviceType>(); + if (mask & TYPE_MASK) atomKK->k_type.sync<LMPDeviceType>(); + if (mask & MASK_MASK) atomKK->k_mask.sync<LMPDeviceType>(); + if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPDeviceType>(); + if (mask & RADIUS_MASK) atomKK->k_radius.sync<LMPDeviceType>(); + if (mask & RMASS_MASK) atomKK->k_rmass.sync<LMPDeviceType>(); + if (mask & OMEGA_MASK) atomKK->k_omega.sync<LMPDeviceType>(); + if (mask & TORQUE_MASK) atomKK->k_torque.sync<LMPDeviceType>(); + } else { + if (mask & X_MASK) atomKK->k_x.sync<LMPHostType>(); + if (mask & V_MASK) atomKK->k_v.sync<LMPHostType>(); + if (mask & F_MASK) atomKK->k_f.sync<LMPHostType>(); + if (mask & TAG_MASK) atomKK->k_tag.sync<LMPHostType>(); + if (mask & TYPE_MASK) atomKK->k_type.sync<LMPHostType>(); + if (mask & MASK_MASK) atomKK->k_mask.sync<LMPHostType>(); + if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPHostType>(); + if (mask & RADIUS_MASK) atomKK->k_radius.sync<LMPHostType>(); + if (mask & RMASS_MASK) atomKK->k_rmass.sync<LMPHostType>(); + if (mask & OMEGA_MASK) atomKK->k_omega.sync<LMPHostType>(); + if (mask & TORQUE_MASK) atomKK->k_torque.sync<LMPHostType>(); + } +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::sync_overlapping_device(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space); + if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space); + if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space); + if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space); + if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space); + if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space); + if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space); + if ((mask & RADIUS_MASK) && atomKK->k_radius.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_float_1d>(atomKK->k_radius,space); + if ((mask & RMASS_MASK) && atomKK->k_rmass.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_float_1d>(atomKK->k_rmass,space); + if ((mask & OMEGA_MASK) && atomKK->k_omega.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_v_array>(atomKK->k_omega,space); + if ((mask & TORQUE_MASK) && atomKK->k_torque.need_sync<LMPDeviceType>()) + perform_async_copy<DAT::tdual_f_array>(atomKK->k_torque,space); + } else { + if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space); + if ((mask & V_MASK) && atomKK->k_v.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_v_array>(atomKK->k_v,space); + if ((mask & F_MASK) && atomKK->k_f.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_f_array>(atomKK->k_f,space); + if ((mask & TAG_MASK) && atomKK->k_tag.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_tag,space); + if ((mask & TYPE_MASK) && atomKK->k_type.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_type,space); + if ((mask & MASK_MASK) && atomKK->k_mask.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space); + if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space); + if ((mask & RADIUS_MASK) && atomKK->k_radius.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_float_1d>(atomKK->k_radius,space); + if ((mask & RMASS_MASK) && atomKK->k_rmass.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_float_1d>(atomKK->k_rmass,space); + if ((mask & OMEGA_MASK) && atomKK->k_omega.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_v_array>(atomKK->k_omega,space); + if ((mask & TORQUE_MASK) && atomKK->k_torque.need_sync<LMPHostType>()) + perform_async_copy<DAT::tdual_f_array>(atomKK->k_torque,space); + } +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::modified(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if (mask & X_MASK) atomKK->k_x.modify<LMPDeviceType>(); + if (mask & V_MASK) atomKK->k_v.modify<LMPDeviceType>(); + if (mask & F_MASK) atomKK->k_f.modify<LMPDeviceType>(); + if (mask & TAG_MASK) atomKK->k_tag.modify<LMPDeviceType>(); + if (mask & TYPE_MASK) atomKK->k_type.modify<LMPDeviceType>(); + if (mask & MASK_MASK) atomKK->k_mask.modify<LMPDeviceType>(); + if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPDeviceType>(); + if (mask & RADIUS_MASK) atomKK->k_radius.modify<LMPDeviceType>(); + if (mask & RMASS_MASK) atomKK->k_rmass.modify<LMPDeviceType>(); + if (mask & OMEGA_MASK) atomKK->k_omega.modify<LMPDeviceType>(); + if (mask & TORQUE_MASK) atomKK->k_torque.modify<LMPDeviceType>(); + } else { + if (mask & X_MASK) atomKK->k_x.modify<LMPHostType>(); + if (mask & V_MASK) atomKK->k_v.modify<LMPHostType>(); + if (mask & F_MASK) atomKK->k_f.modify<LMPHostType>(); + if (mask & TAG_MASK) atomKK->k_tag.modify<LMPHostType>(); + if (mask & TYPE_MASK) atomKK->k_type.modify<LMPHostType>(); + if (mask & MASK_MASK) atomKK->k_mask.modify<LMPHostType>(); + if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPHostType>(); + if (mask & RADIUS_MASK) atomKK->k_radius.modify<LMPHostType>(); + if (mask & RMASS_MASK) atomKK->k_rmass.modify<LMPHostType>(); + if (mask & OMEGA_MASK) atomKK->k_omega.modify<LMPHostType>(); + if (mask & TORQUE_MASK) atomKK->k_torque.modify<LMPHostType>(); + } +} diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.h b/src/KOKKOS/atom_vec_sphere_kokkos.h new file mode 100644 index 0000000000000000000000000000000000000000..d6739cb7143f2028276859078d8fd5f1d65d974d --- /dev/null +++ b/src/KOKKOS/atom_vec_sphere_kokkos.h @@ -0,0 +1,155 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef ATOM_CLASS + +AtomStyle(sphere/kk,AtomVecSphereKokkos) + +#else + +#ifndef LMP_ATOM_VEC_SPHERE_KOKKOS_H +#define LMP_ATOM_VEC_SPHERE_KOKKOS_H + +#include "atom_vec_kokkos.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +class AtomVecSphereKokkos : public AtomVecKokkos { + public: + AtomVecSphereKokkos(class LAMMPS *); + ~AtomVecSphereKokkos() {} + void init(); + void grow(int); + void grow_reset(); + void copy(int, int, int); + int pack_comm(int, int *, double *, int, int *); + int pack_comm_vel(int, int *, double *, int, int *); + int pack_comm_hybrid(int, int *, double *); + void unpack_comm(int, int, double *); + void unpack_comm_vel(int, int, double *); + int unpack_comm_hybrid(int, int, double *); + int pack_reverse(int, int, double *); + int pack_reverse_hybrid(int, int, double *); + void unpack_reverse(int, int *, double *); + int unpack_reverse_hybrid(int, int *, double *); + int pack_border(int, int *, double *, int, int *); + int pack_border_vel(int, int *, double *, int, int *); + int pack_border_hybrid(int, int *, double *); + void unpack_border(int, int, double *); + void unpack_border_vel(int, int, double *); + int unpack_border_hybrid(int, int, double *); + int pack_exchange(int, double *); + int unpack_exchange(double *); + int size_restart(); + int pack_restart(int, double *); + int unpack_restart(double *); + void create_atom(int, double *); + void data_atom(double *, imageint, char **); + int data_atom_hybrid(int, char **); + void data_vel(int, char **); + int data_vel_hybrid(int, char **); + void pack_data(double **); + int pack_data_hybrid(int, double *); + void write_data(FILE *, int, double **); + int write_data_hybrid(FILE *, double *); + void pack_vel(double **); + int pack_vel_hybrid(int, double *); + void write_vel(FILE *, int, double **); + int write_vel_hybrid(FILE *, double *); + bigint memory_usage(); + + int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist, + const int & iswap, + const DAT::tdual_xfloat_2d &buf, + const int &pbc_flag, const int pbc[]); + void unpack_comm_kokkos(const int &n, const int &nfirst, + const DAT::tdual_xfloat_2d &buf); + int pack_comm_self(const int &n, const DAT::tdual_int_2d &list, + const int & iswap, const int nfirst, + const int &pbc_flag, const int pbc[]); + int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, + DAT::tdual_xfloat_2d buf,int iswap, + int pbc_flag, int *pbc, ExecutionSpace space); + void unpack_border_kokkos(const int &n, const int &nfirst, + const DAT::tdual_xfloat_2d &buf, + ExecutionSpace space); + int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space, int dim, + X_FLOAT lo, X_FLOAT hi); + int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, + int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, + ExecutionSpace space); + + void sync(ExecutionSpace space, unsigned int mask); + void modified(ExecutionSpace space, unsigned int mask); + void sync_overlapping_device(ExecutionSpace space, unsigned int mask); + + private: + tagint *tag; + int *type,*mask; + imageint *image; + double **x,**v,**f; + double *radius,*rmass; + double **omega,**torque; + int radvary; + + DAT::t_tagint_1d d_tag; + HAT::t_tagint_1d h_tag; + DAT::t_imageint_1d d_image; + HAT::t_imageint_1d h_image; + DAT::t_int_1d d_type, d_mask; + HAT::t_int_1d h_type, h_mask; + + DAT::t_x_array d_x; + DAT::t_v_array d_v; + DAT::t_f_array d_f; + DAT::t_float_1d d_radius; + HAT::t_float_1d h_radius; + DAT::t_float_1d d_rmass; + HAT::t_float_1d h_rmass; + DAT::t_v_array d_omega; + HAT::t_v_array h_omega; + DAT::t_f_array d_torque; + HAT::t_f_array h_torque; + + DAT::tdual_int_1d k_count; +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Per-processor system is too big + +The number of owned atoms plus ghost atoms on a single +processor must fit in 32-bit integer. + +E: Invalid atom type in Atoms section of data file + +Atom types must range from 1 to specified # of types. + +E: Invalid radius in Atoms section of data file + +Radius must be >= 0.0. + +E: Invalid density in Atoms section of data file + +Density value cannot be <= 0.0. + +*/ diff --git a/src/KOKKOS/fix_neigh_history_kokkos.cpp b/src/KOKKOS/fix_neigh_history_kokkos.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d5277c0188e58593db7759e84476907b25427c69 --- /dev/null +++ b/src/KOKKOS/fix_neigh_history_kokkos.cpp @@ -0,0 +1,346 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "fix_neigh_history_kokkos.h" +#include "atom_kokkos.h" +#include "error.h" +#include "memory_kokkos.h" +#include "neigh_list_kokkos.h" +#include "pair_kokkos.h" +#include "comm.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +template <class DeviceType> +FixNeighHistoryKokkos<DeviceType>::FixNeighHistoryKokkos(LAMMPS *lmp, int narg, char **arg) : + FixNeighHistory(lmp, narg, arg) +{ + kokkosable = 1; + atomKK = (AtomKokkos *)atom; + execution_space = ExecutionSpaceFromDevice<DeviceType>::space; + + memory->destroy(npartner); + memory->destroy(partner); + memory->destroy(valuepartner); + + maxpartner = 8; + grow_arrays(atom->nmax); + + d_resize = typename ArrayTypes<DeviceType>::t_int_scalar("FixNeighHistoryKokkos::resize"); +#ifndef KOKKOS_USE_CUDA_UVM + h_resize = Kokkos::create_mirror_view(d_resize); +#else + h_resize = d_resize; +#endif + h_resize() = 1; +} + +/* ---------------------------------------------------------------------- */ + +template <class DeviceType> +FixNeighHistoryKokkos<DeviceType>::~FixNeighHistoryKokkos() +{ + if (copymode) return; + + memoryKK->destroy_kokkos(k_npartner, npartner); + memoryKK->destroy_kokkos(k_partner, partner); + memoryKK->destroy_kokkos(k_valuepartner, valuepartner); +} + +/* ---------------------------------------------------------------------- */ + +template <class DeviceType> +void FixNeighHistoryKokkos<DeviceType>::init() +{ + if (atomKK->tag_enable == 0) + error->all(FLERR,"Neighbor history requires atoms have IDs"); +} + +/* ---------------------------------------------------------------------- */ + +template <class DeviceType> +void FixNeighHistoryKokkos<DeviceType>::pre_exchange() +{ + copymode = 1; + + h_resize() = 1; + while (h_resize() > 0) { + FixNeighHistoryKokkosZeroPartnerCountFunctor<DeviceType> zero(this); + Kokkos::parallel_for(nlocal_neigh,zero); + + h_resize() = 0; + deep_copy(d_resize, h_resize); + + FixNeighHistoryKokkosPreExchangeFunctor<DeviceType> f(this); + Kokkos::parallel_for(nlocal_neigh,f); + + deep_copy(h_resize, d_resize); + if (h_resize() > 0) { + maxpartner += 8; + memoryKK->grow_kokkos(k_partner,partner,atom->nmax,maxpartner,"neighbor_history:partner"); + memoryKK->grow_kokkos(k_valuepartner,valuepartner,atom->nmax,dnum*maxpartner,"neighbor_history:valuepartner"); + } + } + + copymode = 0; + + comm->maxexchange_fix = MAX(comm->maxexchange_fix,(dnum+1)*maxpartner+1); +} + +/* ---------------------------------------------------------------------- */ + +template <class DeviceType> +void FixNeighHistoryKokkos<DeviceType>::zero_partner_count_item(const int &i) const +{ + d_npartner[i] = 0; +} + +template <class DeviceType> +void FixNeighHistoryKokkos<DeviceType>::pre_exchange_item(const int &ii) const +{ + const int i = d_ilist[ii]; + const int jnum = d_numneigh[i]; + + for (int jj = 0; jj < jnum; jj++) { + if (d_firstflag(i,jj)) { + int j = d_neighbors(i,jj); + j &= NEIGHMASK; + int m = Kokkos::atomic_fetch_add(&d_npartner[i],1); + if (m < maxpartner) { + d_partner(i,m) = tag[j]; + for (int k = 0; k < dnum; k++) + d_valuepartner(i,dnum*m+k) = d_firstvalue(i,dnum*jj+k); + } else { + d_resize() = 1; + } + if (j < nlocal_neigh) { + m = Kokkos::atomic_fetch_add(&d_npartner[j],1); + if (m < maxpartner) { + d_partner(j,m) = tag[i]; + for (int k = 0; k < dnum; k++) + d_valuepartner(j,dnum*m+k) = d_firstvalue(i,dnum*jj+k); + } else { + d_resize() = 1; + } + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +template <class DeviceType> +void FixNeighHistoryKokkos<DeviceType>::setup_post_neighbor() +{ + post_neighbor(); +} + +/* ---------------------------------------------------------------------- */ + +template <class DeviceType> +void FixNeighHistoryKokkos<DeviceType>::post_neighbor() +{ + tag = atomKK->k_tag.view<DeviceType>(); + + int inum = pair->list->inum; + NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(pair->list); + d_numneigh = k_list->d_numneigh; + d_neighbors = k_list->d_neighbors; + d_ilist = k_list->d_ilist; + + // store atom counts used for new neighbor list which was just built + + int nlocal = atom->nlocal; + int nall = nlocal + atom->nghost; + nlocal_neigh = nlocal; + nall_neigh = nall; + + // realloc firstflag and firstvalue if needed + + if (maxatom < nlocal || k_list->maxneighs > d_firstflag.extent(1)) { + maxatom = nall; + d_firstflag = typename ArrayTypes<DeviceType>::t_int_2d("neighbor_history:firstflag",maxatom,k_list->maxneighs); + d_firstvalue = typename ArrayTypes<DeviceType>::t_float_2d("neighbor_history:firstvalue",maxatom,k_list->maxneighs*dnum); + } + + copymode = 1; + + FixNeighHistoryKokkosPostNeighborFunctor<DeviceType> f(this); + Kokkos::parallel_for(inum,f); + + copymode = 0; + +#ifdef LMP_KOKKOS_DEBUG + typename ArrayTypes<LMPHostType>::t_neighbors_2d h_neighbors("neighbor_history:h_neighbor",k_list->d_neighbors.extent(0), k_list->d_neighbors.extent(1)); + deep_copy(h_neighbors, d_neighbors); + + typename ArrayTypes<LMPHostType>::t_int_1d h_numneigh("neighbor_history:h_numneigh",k_list->d_numneigh.extent(0)); + deep_copy(h_numneigh, d_numneigh); + + typename ArrayTypes<LMPHostType>::t_int_2d h_firstflag("neighbor_history:h_firstflag",maxatom,k_list->maxneighs); + deep_copy(h_firstflag, d_firstflag); + + typename ArrayTypes<LMPHostType>::t_float_2d h_firstvalue("neighbor_history:h_firstvalue",maxatom,k_list->maxneighs*dnum); + deep_copy(h_firstvalue, d_firstvalue); +#endif +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +KOKKOS_INLINE_FUNCTION +void FixNeighHistoryKokkos<DeviceType>::post_neighbor_item(const int &ii) const +{ + const int i = d_ilist[ii]; + const int jnum = d_numneigh[i]; + const int np = d_npartner[i]; + + for (int jj = 0; jj < jnum; jj++) { + int j = d_neighbors(i,jj); + const int rflag = j >> SBBITS & 3; + j &= NEIGHMASK; + + int m; + if (rflag) { + int jtag = tag(j); + for (m = 0; m < np; m++) + if (d_partner(i, m) == jtag) break; + if (m < np) { + d_firstflag(i,jj) = 1; + for (int k = 0; k < dnum; k++) { + d_firstvalue(i, dnum*jj+k) = d_valuepartner(i, dnum*m+k); + } + } else { + d_firstflag(i,jj) = 0; + for (int k = 0; k < dnum; k++) { + d_firstvalue(i, dnum*jj+k) = 0; + } + } + } else { + d_firstflag(i,jj) = 0; + for (int k = 0; k < dnum; k++) { + d_firstvalue(i, dnum*jj+k) = 0; + } + } + } +} + +/* ---------------------------------------------------------------------- + memory usage of local atom-based arrays +------------------------------------------------------------------------- */ + +template<class DeviceType> +double FixNeighHistoryKokkos<DeviceType>::memory_usage() +{ + double bytes = d_firstflag.extent(0)*d_firstflag.extent(1)*sizeof(int); + bytes += d_firstvalue.extent(0)*d_firstvalue.extent(1)*sizeof(double); + bytes += 2*k_npartner.extent(0)*sizeof(int); + bytes += 2*k_partner.extent(0)*k_partner.extent(1)*sizeof(int); + bytes += 2*k_valuepartner.extent(0)*k_valuepartner.extent(1)*sizeof(double); + return bytes; +} + +/* ---------------------------------------------------------------------- + allocate fictitious charge arrays +------------------------------------------------------------------------- */ + +template<class DeviceType> +void FixNeighHistoryKokkos<DeviceType>::grow_arrays(int nmax) +{ + k_npartner.template sync<LMPHostType>(); // force reallocation on host + k_partner.template sync<LMPHostType>(); + k_valuepartner.template sync<LMPHostType>(); + + memoryKK->grow_kokkos(k_npartner,npartner,nmax,"neighbor_history:npartner"); + memoryKK->grow_kokkos(k_partner,partner,nmax,maxpartner,"neighbor_history:partner"); + memoryKK->grow_kokkos(k_valuepartner,valuepartner,nmax,dnum*maxpartner,"neighbor_history:valuepartner"); + + d_npartner = k_npartner.template view<DeviceType>(); + d_partner = k_partner.template view<DeviceType>(); + d_valuepartner = k_valuepartner.template view<DeviceType>(); + + k_npartner.template modify<LMPHostType>(); + k_partner.template modify<LMPHostType>(); + k_valuepartner.template modify<LMPHostType>(); +} + +/* ---------------------------------------------------------------------- + copy values within fictitious charge arrays +------------------------------------------------------------------------- */ + +template<class DeviceType> +void FixNeighHistoryKokkos<DeviceType>::copy_arrays(int i, int j, int delflag) +{ + k_npartner.template sync<LMPHostType>(); + k_partner.template sync<LMPHostType>(); + k_valuepartner.template sync<LMPHostType>(); + + npartner[j] = npartner[i]; + for (int m = 0; m < npartner[i]; m++) { + partner[j][m] = partner[i][m]; + valuepartner[j][m] = valuepartner[i][m]; + } + + k_npartner.template modify<LMPHostType>(); + k_partner.template modify<LMPHostType>(); + k_valuepartner.template modify<LMPHostType>(); +} + +/* ---------------------------------------------------------------------- + pack values in local atom-based array for exchange with another proc +------------------------------------------------------------------------- */ + +template<class DeviceType> +int FixNeighHistoryKokkos<DeviceType>::pack_exchange(int i, double *buf) +{ + k_npartner.template sync<LMPHostType>(); + k_partner.template sync<LMPHostType>(); + k_valuepartner.template sync<LMPHostType>(); + + int n = 0; + buf[n++] = npartner[i]; + for (int m = 0; m < npartner[i]; m++) buf[n++] = partner[i][m]; + for (int m = 0; m < dnum*npartner[i]; m++) buf[n++] = valuepartner[i][m]; + + return n; +} + +/* ---------------------------------------------------------------------- + unpack values in local atom-based array from exchange with another proc +------------------------------------------------------------------------- */ + +template<class DeviceType> +int FixNeighHistoryKokkos<DeviceType>::unpack_exchange(int nlocal, double *buf) +{ + int n = 0; + npartner[nlocal] = static_cast<int>(buf[n++]); + for (int m = 0; m < npartner[nlocal]; m++) partner[nlocal][m] = static_cast<int>(buf[n++]); + for (int m = 0; m < dnum*npartner[nlocal]; m++) valuepartner[nlocal][m] = buf[n++]; + + k_npartner.template modify<LMPHostType>(); + k_partner.template modify<LMPHostType>(); + k_valuepartner.template modify<LMPHostType>(); + + return n; +} + +/* ---------------------------------------------------------------------- */ + +namespace LAMMPS_NS { +template class FixNeighHistoryKokkos<LMPDeviceType>; +#ifdef KOKKOS_HAVE_CUDA +template class FixNeighHistoryKokkos<LMPHostType>; +#endif +} diff --git a/src/KOKKOS/fix_neigh_history_kokkos.h b/src/KOKKOS/fix_neigh_history_kokkos.h new file mode 100644 index 0000000000000000000000000000000000000000..68bd59722f53d91d3b0125a3b859487f1e5addd6 --- /dev/null +++ b/src/KOKKOS/fix_neigh_history_kokkos.h @@ -0,0 +1,107 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(NEIGH_HISTORY/KK,FixNeighHistoryKokkos<LMPDeviceType>) +FixStyle(NEIGH_HISTORY/KK/DEVICE,FixNeighHistoryKokkos<LMPDeviceType>) +FixStyle(NEIGH_HISTORY/KK/HOST,FixNeighHistoryKokkos<LMPHostType>) + +#else + +#ifndef LMP_FIX_NEIGH_HISTORY_KOKKOS_H +#define LMP_FIX_NEIGH_HISTORY_KOKKOS_H + +#include "fix_neigh_history.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { +template <class DeviceType> +class FixNeighHistoryKokkos : public FixNeighHistory { + public: + FixNeighHistoryKokkos(class LAMMPS *, int, char **); + ~FixNeighHistoryKokkos(); + + void init(); + void pre_exchange(); + void setup_post_neighbor(); + virtual void post_neighbor(); + double memory_usage(); + void grow_arrays(int); + void copy_arrays(int, int, int); + int pack_exchange(int, double *); + int unpack_exchange(int, double *); + + KOKKOS_INLINE_FUNCTION + void zero_partner_count_item(const int &i) const; + KOKKOS_INLINE_FUNCTION + void pre_exchange_item(const int &ii) const; + KOKKOS_INLINE_FUNCTION + void post_neighbor_item(const int &ii) const; + + typename ArrayTypes<DeviceType>::t_int_2d d_firstflag; + typename ArrayTypes<DeviceType>::t_float_2d d_firstvalue; + + private: + typename ArrayTypes<DeviceType>::tdual_int_1d k_npartner; + typename ArrayTypes<DeviceType>::tdual_int_2d k_partner; + typename ArrayTypes<DeviceType>::tdual_float_2d k_valuepartner; + + // for neighbor list lookup + typename ArrayTypes<DeviceType>::t_neighbors_2d d_neighbors; + typename ArrayTypes<DeviceType>::t_int_1d_randomread d_ilist; + typename ArrayTypes<DeviceType>::t_int_1d_randomread d_numneigh; + + typename ArrayTypes<DeviceType>::t_tagint_1d tag; + typename ArrayTypes<DeviceType>::t_int_1d d_npartner; + typename ArrayTypes<DeviceType>::t_int_2d d_partner; + typename ArrayTypes<DeviceType>::t_float_2d d_valuepartner; + + typename ArrayTypes<DeviceType>::t_int_scalar d_resize; + typename ArrayTypes<LMPHostType>::t_int_scalar h_resize; +}; + +template <class DeviceType> +struct FixNeighHistoryKokkosZeroPartnerCountFunctor { + FixNeighHistoryKokkos<DeviceType> c; + FixNeighHistoryKokkosZeroPartnerCountFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {} + KOKKOS_INLINE_FUNCTION + void operator()(const int &i) const { + c.zero_partner_count_item(i); + } +}; + +template <class DeviceType> +struct FixNeighHistoryKokkosPreExchangeFunctor { + FixNeighHistoryKokkos<DeviceType> c; + FixNeighHistoryKokkosPreExchangeFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {} + KOKKOS_INLINE_FUNCTION + void operator() (const int &i) const { + c.pre_exchange_item(i); + } +}; + +template <class DeviceType> +struct FixNeighHistoryKokkosPostNeighborFunctor { + FixNeighHistoryKokkos<DeviceType> c; + FixNeighHistoryKokkosPostNeighborFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {} + KOKKOS_INLINE_FUNCTION + void operator() (const int &i) const { + c.post_neighbor_item(i); + } +}; + +} // namespace LAMMPS_NS + +#endif // LMP_FIX_NEIGH_HISTORY_KOKKOS_H +#endif // FIX_CLASS diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp index 4411627a784f01329cb35da5afbb9457597d45e1..c50d6305e7353b082ef16aa2ca2774bea6a0a414 100644 --- a/src/KOKKOS/npair_kokkos.cpp +++ b/src/KOKKOS/npair_kokkos.cpp @@ -24,8 +24,8 @@ namespace LAMMPS_NS { /* ---------------------------------------------------------------------- */ -template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI> -NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::NPairKokkos(LAMMPS *lmp) : NPair(lmp) { +template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI, int SIZE> +NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::NPairKokkos(LAMMPS *lmp) : NPair(lmp) { } @@ -33,8 +33,8 @@ NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::NPairKokkos(LAMMPS *lmp) : NPair(l copy needed info from Neighbor class to this build class ------------------------------------------------------------------------- */ -template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI> -void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::copy_neighbor_info() +template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI, int SIZE> +void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::copy_neighbor_info() { NPair::copy_neighbor_info(); @@ -63,8 +63,8 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::copy_neighbor_info() copy per-atom and per-bin vectors from NBin class to this build class ------------------------------------------------------------------------- */ -template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI> -void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::copy_bin_info() +template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI, int SIZE> +void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::copy_bin_info() { NPair::copy_bin_info(); @@ -80,8 +80,8 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::copy_bin_info() copy needed info from NStencil class to this build class ------------------------------------------------------------------------- */ -template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI> -void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::copy_stencil_info() +template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI, int SIZE> +void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::copy_stencil_info() { NPair::copy_stencil_info(); @@ -110,8 +110,8 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::copy_stencil_info() /* ---------------------------------------------------------------------- */ -template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI> -void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::build(NeighList *list_) +template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI, int SIZE> +void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::build(NeighList *list_) { NeighListKokkos<DeviceType>* list = (NeighListKokkos<DeviceType>*) list_; const int nlocal = includegroup?atom->nfirst:atom->nlocal; @@ -131,6 +131,7 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::build(NeighList *list_) k_stencilxyz.view<DeviceType>(), nlocal, atomKK->k_x.view<DeviceType>(), + atomKK->k_radius.view<DeviceType>(), atomKK->k_type.view<DeviceType>(), atomKK->k_mask.view<DeviceType>(), atomKK->k_molecule.view<DeviceType>(), @@ -155,7 +156,8 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::build(NeighList *list_) k_ex_mol_intra.view<DeviceType>(), bboxhi,bboxlo, domain->xperiodic,domain->yperiodic,domain->zperiodic, - domain->xprd_half,domain->yprd_half,domain->zprd_half); + domain->xprd_half,domain->yprd_half,domain->zprd_half, + skin); k_cutneighsq.sync<DeviceType>(); k_ex1_type.sync<DeviceType>(); @@ -171,7 +173,7 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::build(NeighList *list_) k_bincount.sync<DeviceType>(); k_bins.sync<DeviceType>(); k_atom2bin.sync<DeviceType>(); - atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK); + atomKK->sync(Device,X_MASK|RADIUS_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK); data.special_flag[0] = special_flag[0]; data.special_flag[1] = special_flag[1]; @@ -198,25 +200,49 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::build(NeighList *list_) Kokkos::parallel_for(nall, f); } else { if (newton_pair) { - NPairKokkosBuildFunctor<DeviceType,TRI?0:HALF_NEIGH,1,TRI> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); + if (SIZE) { + NPairKokkosBuildFunctorSize<DeviceType,TRI?0:HALF_NEIGH,1,TRI> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); #ifdef KOKKOS_HAVE_CUDA - if (ExecutionSpaceFromDevice<DeviceType>::space == Device) - Kokkos::parallel_for(config, f); - else - Kokkos::parallel_for(nall, f); + if (ExecutionSpaceFromDevice<DeviceType>::space == Device) + Kokkos::parallel_for(config, f); + else + Kokkos::parallel_for(nall, f); #else - Kokkos::parallel_for(nall, f); + Kokkos::parallel_for(nall, f); #endif + } else { + NPairKokkosBuildFunctor<DeviceType,TRI?0:HALF_NEIGH,1,TRI> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); +#ifdef KOKKOS_HAVE_CUDA + if (ExecutionSpaceFromDevice<DeviceType>::space == Device) + Kokkos::parallel_for(config, f); + else + Kokkos::parallel_for(nall, f); +#else + Kokkos::parallel_for(nall, f); +#endif + } } else { - NPairKokkosBuildFunctor<DeviceType,HALF_NEIGH,0,0> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); + if (SIZE) { + NPairKokkosBuildFunctorSize<DeviceType,HALF_NEIGH,0,0> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); #ifdef KOKKOS_HAVE_CUDA - if (ExecutionSpaceFromDevice<DeviceType>::space == Device) - Kokkos::parallel_for(config, f); - else - Kokkos::parallel_for(nall, f); + if (ExecutionSpaceFromDevice<DeviceType>::space == Device) + Kokkos::parallel_for(config, f); + else + Kokkos::parallel_for(nall, f); #else - Kokkos::parallel_for(nall, f); + Kokkos::parallel_for(nall, f); #endif + } else { + NPairKokkosBuildFunctor<DeviceType,HALF_NEIGH,0,0> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); +#ifdef KOKKOS_HAVE_CUDA + if (ExecutionSpaceFromDevice<DeviceType>::space == Device) + Kokkos::parallel_for(config, f); + else + Kokkos::parallel_for(nall, f); +#else + Kokkos::parallel_for(nall, f); +#endif + } } } deep_copy(data.h_resize, data.resize); @@ -774,19 +800,307 @@ void NeighborKokkosExecute<DeviceType>:: neigh_list.d_ilist(i) = i; } +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> template<int HalfNeigh,int Newton,int Tri> +void NeighborKokkosExecute<DeviceType>:: + build_ItemSize(const int &i) const +{ + /* if necessary, goto next page and add pages */ + int n = 0; + int which = 0; + + // get subview of neighbors of i + + const AtomNeighbors neighbors_i = neigh_list.get_neighbors(i); + const X_FLOAT xtmp = x(i, 0); + const X_FLOAT ytmp = x(i, 1); + const X_FLOAT ztmp = x(i, 2); + const X_FLOAT radi = radius(i); + const int itype = type(i); + + const int ibin = c_atom2bin(i); + + const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil + = d_stencil; + + const int mask_history = 3 << SBBITS; + + // loop over all bins in neighborhood (includes ibin) + if(HalfNeigh) + for(int m = 0; m < c_bincount(ibin); m++) { + const int j = c_bins(ibin,m); + const int jtype = type(j); + + //for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using HalfNeighborlists + if((j == i) || (HalfNeigh && !Newton && (j < i)) || + (HalfNeigh && Newton && ((j < i) || ((j >= nlocal) && + ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) || + (x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp))))) + ) continue; + if(exclude && exclusion(i,j,itype,jtype)) continue; + + const X_FLOAT delx = xtmp - x(j, 0); + const X_FLOAT dely = ytmp - x(j, 1); + const X_FLOAT delz = ztmp - x(j, 2); + const X_FLOAT rsq = delx * delx + dely * dely + delz * delz; + const X_FLOAT radsum = radi + radius(j); + const X_FLOAT cutsq = (radsum + skin) * (radsum + skin); + + if(rsq <= cutsq) { + if(n<neigh_list.maxneighs) { + if(neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history; + else neighbors_i(n++) = j; + } + else n++; + } + } + + for(int k = 0; k < nstencil; k++) { + const int jbin = ibin + stencil[k]; + + // get subview of jbin + if(HalfNeigh && (ibin==jbin)) continue; + //const ArrayTypes<DeviceType>::t_int_1d_const_um =Kokkos::subview<t_int_1d_const_um>(bins,jbin,ALL); + for(int m = 0; m < c_bincount(jbin); m++) { + + const int j = c_bins(jbin,m); + const int jtype = type(j); + + if(HalfNeigh && !Newton && (j < i)) continue; + if(!HalfNeigh && j==i) continue; + if(Tri) { + if (x(j,2) < ztmp) continue; + if (x(j,2) == ztmp) { + if (x(j,1) < ytmp) continue; + if (x(j,1) == ytmp) { + if (x(j,0) < xtmp) continue; + if (x(j,0) == xtmp && j <= i) continue; + } + } + } + if(exclude && exclusion(i,j,itype,jtype)) continue; + + const X_FLOAT delx = xtmp - x(j, 0); + const X_FLOAT dely = ytmp - x(j, 1); + const X_FLOAT delz = ztmp - x(j, 2); + const X_FLOAT rsq = delx * delx + dely * dely + delz * delz; + const X_FLOAT radsum = radi + radius(j); + const X_FLOAT cutsq = (radsum + skin) * (radsum + skin); + + if(rsq <= cutsq) { + if(n<neigh_list.maxneighs) { + if(neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history; + else neighbors_i(n++) = j; + } + else n++; + } + } + } + + neigh_list.d_numneigh(i) = n; + + if(n > neigh_list.maxneighs) { + resize() = 1; + + if(n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop + } + + neigh_list.d_ilist(i) = i; +} + +/* ---------------------------------------------------------------------- */ + +#ifdef KOKKOS_HAVE_CUDA +template<class DeviceType> template<int HalfNeigh,int Newton,int Tri> +__device__ inline +void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const +{ + /* loop over atoms in i's bin, + */ + const int atoms_per_bin = c_bins.extent(1); + const int BINS_PER_TEAM = dev.team_size()/atoms_per_bin<1?1:dev.team_size()/atoms_per_bin; + const int TEAMS_PER_BIN = atoms_per_bin/dev.team_size()<1?1:atoms_per_bin/dev.team_size(); + const int MY_BIN = dev.team_rank()/atoms_per_bin; + + const int ibin = dev.league_rank()*BINS_PER_TEAM+MY_BIN; + + if(ibin >=c_bincount.extent(0)) return; + X_FLOAT* other_x = sharedmem; + other_x = other_x + 6*atoms_per_bin*MY_BIN; + + int* other_id = (int*) &other_x[5 * atoms_per_bin]; + + int bincount_current = c_bincount[ibin]; + + for(int kk = 0; kk < TEAMS_PER_BIN; kk++) { + const int MY_II = dev.team_rank()%atoms_per_bin+kk*dev.team_size(); + const int i = MY_II < bincount_current ? c_bins(ibin, MY_II) : -1; + /* if necessary, goto next page and add pages */ + + int n = 0; + + X_FLOAT xtmp; + X_FLOAT ytmp; + X_FLOAT ztmp; + X_FLOAT radi; + int itype; + const AtomNeighbors neighbors_i = neigh_list.get_neighbors((i>=0&&i<nlocal)?i:0); + const int mask_history = 3 << SBBITS; + + if(i >= 0) { + xtmp = x(i, 0); + ytmp = x(i, 1); + ztmp = x(i, 2); + radi = radius(i); + itype = type(i); + other_x[MY_II] = xtmp; + other_x[MY_II + atoms_per_bin] = ytmp; + other_x[MY_II + 2 * atoms_per_bin] = ztmp; + other_x[MY_II + 3 * atoms_per_bin] = itype; + other_x[MY_II + 4 * atoms_per_bin] = radi; + } + other_id[MY_II] = i; + int test = (__syncthreads_count(i >= 0 && i <= nlocal) == 0); + + if(test) return; + + if(i >= 0 && i < nlocal) { + #pragma unroll 4 + for(int m = 0; m < bincount_current; m++) { + int j = other_id[m]; + const int jtype = other_x[m + 3 * atoms_per_bin]; + + //for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using halfneighborlists + if((j == i) || + (HalfNeigh && !Newton && (j < i)) || + (HalfNeigh && Newton && + ((j < i) || + ((j >= nlocal) && ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) || + (x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp))))) + ) continue; + if(Tri) { + if (x(j,2) < ztmp) continue; + if (x(j,2) == ztmp) { + if (x(j,1) < ytmp) continue; + if (x(j,1) == ytmp) { + if (x(j,0) < xtmp) continue; + if (x(j,0) == xtmp && j <= i) continue; + } + } + } + if(exclude && exclusion(i,j,itype,jtype)) continue; + const X_FLOAT delx = xtmp - other_x[m]; + const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin]; + const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin]; + const X_FLOAT rsq = delx * delx + dely * dely + delz * delz; + const X_FLOAT radsum = radi + other_x[m + 4 * atoms_per_bin]; + const X_FLOAT cutsq = (radsum + skin) * (radsum + skin); + + if(rsq <= cutsq) { + if(n<neigh_list.maxneighs) { + if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history; + else neighbors_i(n++) = j; + } + else n++; + } + } + } + __syncthreads(); + + const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil + = d_stencil; + for(int k = 0; k < nstencil; k++) { + const int jbin = ibin + stencil[k]; + + if(ibin == jbin) continue; + + bincount_current = c_bincount[jbin]; + int j = MY_II < bincount_current ? c_bins(jbin, MY_II) : -1; + + if(j >= 0) { + other_x[MY_II] = x(j, 0); + other_x[MY_II + atoms_per_bin] = x(j, 1); + other_x[MY_II + 2 * atoms_per_bin] = x(j, 2); + other_x[MY_II + 3 * atoms_per_bin] = type(j); + other_x[MY_II + 4 * atoms_per_bin] = radius(j); + } + + other_id[MY_II] = j; + + __syncthreads(); + + if(i >= 0 && i < nlocal) { + #pragma unroll 8 + for(int m = 0; m < bincount_current; m++) { + const int j = other_id[m]; + const int jtype = other_x[m + 3 * atoms_per_bin]; + + if(HalfNeigh && (j < i)) continue; + if(HalfNeigh && !Newton && (j < i)) continue; + if(!HalfNeigh && j==i) continue; + if(Tri) { + if (x(j,2) < ztmp) continue; + if (x(j,2) == ztmp) { + if (x(j,1) < ytmp) continue; + if (x(j,1) == ytmp) { + if (x(j,0) < xtmp) continue; + if (x(j,0) == xtmp && j <= i) continue; + } + } + } + if(exclude && exclusion(i,j,itype,jtype)) continue; + + const X_FLOAT delx = xtmp - other_x[m]; + const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin]; + const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin]; + const X_FLOAT rsq = delx * delx + dely * dely + delz * delz; + const X_FLOAT radsum = radi + other_x[m + 4 * atoms_per_bin]; + const X_FLOAT cutsq = (radsum + skin) * (radsum + skin); + + if(rsq <= cutsq) { + if(n<neigh_list.maxneighs) { + if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history; + else neighbors_i(n++) = j; + } + else n++; + } + } + } + __syncthreads(); + } + + if(i >= 0 && i < nlocal) { + neigh_list.d_numneigh(i) = n; + neigh_list.d_ilist(i) = i; + } + + if(n > neigh_list.maxneighs) { + resize() = 1; + + if(n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop + } + } +} +#endif + } namespace LAMMPS_NS { -template class NPairKokkos<LMPDeviceType,0,0,0>; -template class NPairKokkos<LMPDeviceType,0,1,0>; -template class NPairKokkos<LMPDeviceType,1,0,0>; -template class NPairKokkos<LMPDeviceType,1,1,0>; -template class NPairKokkos<LMPDeviceType,1,0,1>; +template class NPairKokkos<LMPDeviceType,0,0,0,0>; +template class NPairKokkos<LMPDeviceType,0,1,0,0>; +template class NPairKokkos<LMPDeviceType,1,0,0,0>; +template class NPairKokkos<LMPDeviceType,1,1,0,0>; +template class NPairKokkos<LMPDeviceType,1,0,1,0>; +template class NPairKokkos<LMPDeviceType,1,0,0,1>; +template class NPairKokkos<LMPDeviceType,1,0,1,1>; #ifdef KOKKOS_HAVE_CUDA -template class NPairKokkos<LMPHostType,0,0,0>; -template class NPairKokkos<LMPHostType,0,1,0>; -template class NPairKokkos<LMPHostType,1,0,0>; -template class NPairKokkos<LMPHostType,1,1,0>; -template class NPairKokkos<LMPHostType,1,0,1>; +template class NPairKokkos<LMPHostType,0,0,0,0>; +template class NPairKokkos<LMPHostType,0,1,0,0>; +template class NPairKokkos<LMPHostType,1,0,0,0>; +template class NPairKokkos<LMPHostType,1,1,0,0>; +template class NPairKokkos<LMPHostType,1,0,1,0>; +template class NPairKokkos<LMPHostType,1,0,0,1>; +template class NPairKokkos<LMPHostType,1,0,1,1>; #endif } diff --git a/src/KOKKOS/npair_kokkos.h b/src/KOKKOS/npair_kokkos.h index 6c1c0e958b49dcd09ff7bb6b32efca67b2d3f6a6..a8837fa9f35d7e4480fa3a42ebd1558e7a3d1a57 100644 --- a/src/KOKKOS/npair_kokkos.h +++ b/src/KOKKOS/npair_kokkos.h @@ -13,56 +13,76 @@ #ifdef NPAIR_CLASS -typedef NPairKokkos<LMPHostType,0,0,0> NPairKokkosFullBinHost; +typedef NPairKokkos<LMPHostType,0,0,0,0> NPairKokkosFullBinHost; NPairStyle(full/bin/kk/host, NPairKokkosFullBinHost, NP_FULL | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI) -typedef NPairKokkos<LMPDeviceType,0,0,0> NPairKokkosFullBinDevice; +typedef NPairKokkos<LMPDeviceType,0,0,0,0> NPairKokkosFullBinDevice; NPairStyle(full/bin/kk/device, NPairKokkosFullBinDevice, NP_FULL | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI) -typedef NPairKokkos<LMPHostType,0,1,0> NPairKokkosFullBinGhostHost; +typedef NPairKokkos<LMPHostType,0,1,0,0> NPairKokkosFullBinGhostHost; NPairStyle(full/bin/ghost/kk/host, NPairKokkosFullBinGhostHost, NP_FULL | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI) -typedef NPairKokkos<LMPDeviceType,0,1,0> NPairKokkosFullBinGhostDevice; +typedef NPairKokkos<LMPDeviceType,0,1,0,0> NPairKokkosFullBinGhostDevice; NPairStyle(full/bin/ghost/kk/device, NPairKokkosFullBinGhostDevice, NP_FULL | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI) -typedef NPairKokkos<LMPHostType,1,0,0> NPairKokkosHalfBinHost; +typedef NPairKokkos<LMPHostType,1,0,0,0> NPairKokkosHalfBinHost; NPairStyle(half/bin/kk/host, NPairKokkosHalfBinHost, NP_HALF | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_ORTHO) -typedef NPairKokkos<LMPDeviceType,1,0,0> NPairKokkosHalfBinDevice; +typedef NPairKokkos<LMPDeviceType,1,0,0,0> NPairKokkosHalfBinDevice; NPairStyle(half/bin/kk/device, NPairKokkosHalfBinDevice, NP_HALF | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_ORTHO) -typedef NPairKokkos<LMPHostType,1,0,1> NPairKokkosHalfBinHostTri; +typedef NPairKokkos<LMPHostType,1,0,1,0> NPairKokkosHalfBinHostTri; NPairStyle(half/bin/kk/host, NPairKokkosHalfBinHostTri, NP_HALF | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_TRI) -typedef NPairKokkos<LMPDeviceType,1,0,1> NPairKokkosHalfBinDeviceTri; +typedef NPairKokkos<LMPDeviceType,1,0,1,0> NPairKokkosHalfBinDeviceTri; NPairStyle(half/bin/kk/device, NPairKokkosHalfBinDeviceTri, NP_HALF | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_TRI) -typedef NPairKokkos<LMPHostType,1,1,0> NPairKokkosHalfBinGhostHost; +typedef NPairKokkos<LMPHostType,1,1,0,0> NPairKokkosHalfBinGhostHost; NPairStyle(half/bin/ghost/kk/host, NPairKokkosHalfBinGhostHost, NP_HALF | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI) -typedef NPairKokkos<LMPDeviceType,1,1,0> NPairKokkosHalfBinGhostDevice; +typedef NPairKokkos<LMPDeviceType,1,1,0,0> NPairKokkosHalfBinGhostDevice; NPairStyle(half/bin/ghost/kk/device, NPairKokkosHalfBinGhostDevice, NP_HALF | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI) +typedef NPairKokkos<LMPHostType,1,0,0,1> NPairKokkosHalfSizeBinHost; +NPairStyle(half/size/bin/kk/host, + NPairKokkosHalfSizeBinHost, + NP_HALF | NP_SIZE | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_ORTHO) + +typedef NPairKokkos<LMPDeviceType,1,0,0,1> NPairKokkosHalfSizeBinDevice; +NPairStyle(half/size/bin/kk/device, + NPairKokkosHalfSizeBinDevice, + NP_HALF | NP_SIZE | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_ORTHO) + +typedef NPairKokkos<LMPHostType,1,0,1,1> NPairKokkosHalfSizeBinHostTri; +NPairStyle(half/size/bin/kk/host, + NPairKokkosHalfSizeBinHostTri, + NP_HALF | NP_SIZE | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_TRI) + +typedef NPairKokkos<LMPDeviceType,1,0,1,1> NPairKokkosHalfSizeBinDeviceTri; +NPairStyle(half/size/bin/kk/device, + NPairKokkosHalfSizeBinDeviceTri, + NP_HALF | NP_SIZE | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_TRI) + #else #ifndef LMP_NPAIR_KOKKOS_H @@ -73,7 +93,7 @@ NPairStyle(half/bin/ghost/kk/device, namespace LAMMPS_NS { -template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI> +template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI, int SIZE> class NPairKokkos : public NPair { public: NPairKokkos(class LAMMPS *); @@ -162,6 +182,7 @@ class NeighborKokkosExecute // data from Atom class const typename AT::t_x_array_randomread x; + const typename AT::t_float_1d_randomread radius; const typename AT::t_int_1d_const type,mask; const typename AT::t_tagint_1d_const molecule; const typename AT::t_tagint_1d_const tag; @@ -188,6 +209,9 @@ class NeighborKokkosExecute const int xperiodic, yperiodic, zperiodic; const int xprd_half, yprd_half, zprd_half; + // GRANULAR required member variables + const X_FLOAT skin; + NeighborKokkosExecute( const NeighListKokkos<DeviceType> &_neigh_list, const typename AT::t_xfloat_2d_randomread &_cutneighsq, @@ -199,6 +223,7 @@ class NeighborKokkosExecute const typename AT::t_int_1d_3 &_d_stencilxyz, const int _nlocal, const typename AT::t_x_array_randomread &_x, + const typename AT::t_float_1d_randomread &_radius, const typename AT::t_int_1d_const &_type, const typename AT::t_int_1d_const &_mask, const typename AT::t_tagint_1d_const &_molecule, @@ -225,13 +250,14 @@ class NeighborKokkosExecute const typename AT::t_int_1d_const & _ex_mol_intra, const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo, const int & _xperiodic, const int & _yperiodic, const int & _zperiodic, - const int & _xprd_half, const int & _yprd_half, const int & _zprd_half): + const int & _xprd_half, const int & _yprd_half, const int & _zprd_half, + const X_FLOAT _skin): neigh_list(_neigh_list), cutneighsq(_cutneighsq), bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins), atom2bin(_atom2bin),c_atom2bin(_atom2bin), nstencil(_nstencil),d_stencil(_d_stencil),d_stencilxyz(_d_stencilxyz), nlocal(_nlocal), - x(_x),type(_type),mask(_mask),molecule(_molecule), + x(_x),radius(_radius),type(_type),mask(_mask),molecule(_molecule), tag(_tag),special(_special),nspecial(_nspecial),molecular(_molecular), nbinx(_nbinx),nbiny(_nbiny),nbinz(_nbinz), mbinx(_mbinx),mbiny(_mbiny),mbinz(_mbinz), @@ -245,7 +271,8 @@ class NeighborKokkosExecute ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit), ex_mol_intra(_ex_mol_intra), xperiodic(_xperiodic),yperiodic(_yperiodic),zperiodic(_zperiodic), - xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half) { + xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half), + skin(_skin) { if (molecular == 2) moltemplate = 1; else moltemplate = 0; @@ -280,10 +307,18 @@ class NeighborKokkosExecute KOKKOS_FUNCTION void build_Item_Ghost(const int &i) const; + template<int HalfNeigh, int Newton, int Tri> + KOKKOS_FUNCTION + void build_ItemSize(const int &i) const; + #ifdef KOKKOS_HAVE_CUDA template<int HalfNeigh, int Newton, int Tri> __device__ inline void build_ItemCuda(typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const; + + template<int HalfNeigh, int Newton, int Tri> + __device__ inline + void build_ItemSizeCuda(typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const; #endif KOKKOS_INLINE_FUNCTION @@ -399,6 +434,48 @@ struct NPairKokkosBuildFunctorGhost { } }; +template <class DeviceType, int HALF_NEIGH, int GHOST_NEWTON, int TRI> +struct NPairKokkosBuildFunctorSize { + typedef DeviceType device_type; + + const NeighborKokkosExecute<DeviceType> c; + const size_t sharedsize; + + NPairKokkosBuildFunctorSize(const NeighborKokkosExecute<DeviceType> &_c, + const size_t _sharedsize): c(_c), sharedsize(_sharedsize) {}; + + KOKKOS_INLINE_FUNCTION + void operator() (const int & i) const { + c.template build_ItemSize<HALF_NEIGH,GHOST_NEWTON,TRI>(i); + } + +#ifdef KOKKOS_HAVE_CUDA + __device__ inline + void operator() (typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const { + c.template build_ItemSizeCuda<HALF_NEIGH,GHOST_NEWTON,TRI>(dev); + } + size_t shmem_size(const int team_size) const { (void) team_size; return sharedsize; } +#endif +}; + +template <int HALF_NEIGH, int GHOST_NEWTON, int TRI> +struct NPairKokkosBuildFunctorSize<LMPHostType,HALF_NEIGH,GHOST_NEWTON,TRI> { + typedef LMPHostType device_type; + + const NeighborKokkosExecute<LMPHostType> c; + const size_t sharedsize; + + NPairKokkosBuildFunctorSize(const NeighborKokkosExecute<LMPHostType> &_c, + const size_t _sharedsize): c(_c), sharedsize(_sharedsize) {}; + + KOKKOS_INLINE_FUNCTION + void operator() (const int & i) const { + c.template build_ItemSize<HALF_NEIGH,GHOST_NEWTON,TRI>(i); + } + + void operator() (typename Kokkos::TeamPolicy<LMPHostType>::member_type dev) const {} // Should error out +}; + } #endif diff --git a/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp b/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d7c8f8a99e6a2faacb896ad248a61dc3b148e14a --- /dev/null +++ b/src/KOKKOS/pair_gran_hooke_history_kokkos.cpp @@ -0,0 +1,440 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "pair_gran_hooke_history_kokkos.h" +#include "kokkos.h" +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "memory_kokkos.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "error.h" +#include "modify.h" +#include "fix_neigh_history_kokkos.h" +#include "update.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +PairGranHookeHistoryKokkos<DeviceType>::PairGranHookeHistoryKokkos(LAMMPS *lmp) : PairGranHookeHistory(lmp) +{ + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice<DeviceType>::space; + datamask_read = X_MASK | V_MASK | OMEGA_MASK | F_MASK | TORQUE_MASK | TYPE_MASK | MASK_MASK | ENERGY_MASK | VIRIAL_MASK | RMASS_MASK | RADIUS_MASK; + datamask_modify = F_MASK | TORQUE_MASK | ENERGY_MASK | VIRIAL_MASK; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +PairGranHookeHistoryKokkos<DeviceType>::~PairGranHookeHistoryKokkos() +{ + if (copymode) return; + + if (allocated) { + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); + eatom = NULL; + vatom = NULL; + } +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +template<class DeviceType> +void PairGranHookeHistoryKokkos<DeviceType>::init_style() +{ + if (history && fix_history == NULL) { + char dnumstr[16]; + sprintf(dnumstr,"%d",3); + char **fixarg = new char*[4]; + fixarg[0] = (char *) "NEIGH_HISTORY"; + fixarg[1] = (char *) "all"; + if (execution_space == Device) + fixarg[2] = (char *) "NEIGH_HISTORY/KK/DEVICE"; + else + fixarg[2] = (char *) "NEIGH_HISTORY/KK/HOST"; + fixarg[3] = dnumstr; + modify->add_fix(4,fixarg,1); + delete [] fixarg; + fix_history = (FixNeighHistory *) modify->fix[modify->nfix-1]; + fix_history->pair = this; + fix_historyKK = (FixNeighHistoryKokkos<DeviceType> *)fix_history; + } + + PairGranHookeHistory::init_style(); + + // irequest = neigh request made by parent class + + neighflag = lmp->kokkos->neighflag; + int irequest = neighbor->nrequest - 1; + + neighbor->requests[irequest]-> + kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value && + !Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value; + neighbor->requests[irequest]-> + kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value; + + if (neighflag == HALF || neighflag == HALFTHREAD) { + neighbor->requests[irequest]->full = 0; + neighbor->requests[irequest]->half = 1; + } else { + error->all(FLERR,"Cannot use chosen neighbor list style with gran/hooke/history/kk"); + } +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +void PairGranHookeHistoryKokkos<DeviceType>::compute(int eflag_in, int vflag_in) +{ + copymode = 1; + + eflag = eflag_in; + vflag = vflag_in; + + if (neighflag == FULL) no_virial_fdotr_compute = 1; + + if (eflag || vflag) ev_setup(eflag,vflag,0); + else evflag = vflag_fdotr = 0; + + int shearupdate = 1; + if (update->setupflag) shearupdate = 0; + + // reallocate per-atom arrays if necessary + + if (eflag_atom) { + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + d_eatom = k_eatom.view<DeviceType>(); + } + if (vflag_atom) { + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + d_vatom = k_vatom.view<DeviceType>(); + } + + atomKK->sync(execution_space,datamask_read); + if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); + else atomKK->modified(execution_space,F_MASK | TORQUE_MASK); + + x = atomKK->k_x.view<DeviceType>(); + v = atomKK->k_v.view<DeviceType>(); + omega = atomKK->k_omega.view<DeviceType>(); + c_x = atomKK->k_x.view<DeviceType>(); + f = atomKK->k_f.view<DeviceType>(); + torque = atomKK->k_torque.view<DeviceType>(); + type = atomKK->k_type.view<DeviceType>(); + mask = atomKK->k_mask.view<DeviceType>(); + tag = atomKK->k_tag.view<DeviceType>(); + rmass = atomKK->k_rmass.view<DeviceType>(); + radius = atomKK->k_radius.view<DeviceType>(); + nlocal = atom->nlocal; + nall = atom->nlocal + atom->nghost; + newton_pair = force->newton_pair; + special_lj[0] = force->special_lj[0]; + special_lj[1] = force->special_lj[1]; + special_lj[2] = force->special_lj[2]; + special_lj[3] = force->special_lj[3]; + + int inum = list->inum; + NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list); + d_numneigh = k_list->d_numneigh; + d_neighbors = k_list->d_neighbors; + d_ilist = k_list->d_ilist; + + d_firsttouch = fix_historyKK->d_firstflag; + d_firstshear = fix_historyKK->d_firstvalue; + + EV_FLOAT ev; + + if (lmp->kokkos->neighflag == HALF) { + if (force->newton_pair) { + if (evflag) { + if (shearupdate) { + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,1,1,1>>(0,inum),*this); + } else { + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,1,1,0>>(0,inum),*this); + } + } else { + if (shearupdate) { + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,1,0,1>>(0,inum),*this); + } else { + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,1,0,0>>(0,inum),*this); + } + } + } else { + if (evflag) { + if (shearupdate) { + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,0,1,1>>(0,inum),*this); + } else { + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,0,1,0>>(0,inum),*this); + } + } else { + if (shearupdate) { + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,0,0,1>>(0,inum),*this); + } else { + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,0,0,0>>(0,inum),*this); + } + } + } + } else { // HALFTHREAD + if (force->newton_pair) { + if (evflag) { + if (shearupdate) { + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,1,1,1>>(0,inum),*this); + } else { + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,1,1,0>>(0,inum),*this); + } + } else { + if (shearupdate) { + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,1,0,1>>(0,inum),*this); + } else { + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,1,0,0>>(0,inum),*this); + } + } + } else { + if (evflag) { + if (shearupdate) { + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,0,1,1>>(0,inum),*this); + } else { + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,0,1,0>>(0,inum),*this); + } + } else { + if (shearupdate) { + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,0,0,1>>(0,inum),*this); + } else { + Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,0,0,0>>(0,inum),*this); + } + } + } + } + + if (eflag_global) eng_vdwl += ev.evdwl; + if (vflag_global) { + virial[0] += ev.v[0]; + virial[1] += ev.v[1]; + virial[2] += ev.v[2]; + virial[3] += ev.v[3]; + virial[4] += ev.v[4]; + virial[5] += ev.v[5]; + } + + if (eflag_atom) { + k_eatom.template modify<DeviceType>(); + k_eatom.template sync<LMPHostType>(); + } + + if (vflag_atom) { + k_vatom.template modify<DeviceType>(); + k_vatom.template sync<LMPHostType>(); + } + + if (vflag_fdotr) pair_virial_fdotr_compute(this); + + copymode = 0; +} + +template<class DeviceType> +template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int SHEARUPDATE> +KOKKOS_INLINE_FUNCTION +void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,SHEARUPDATE>, const int &ii) const { + + // The f and torque arrays are atomic for Half/Thread neighbor style + Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f; + Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_torque = torque; + + int i = d_ilist[ii]; + double xtmp = x(i,0); + double ytmp = x(i,1); + double ztmp = x(i,2); + int itype = type[i]; + double imass = rmass[i]; + double irad = radius[i]; + int jnum = d_numneigh[i]; + + double fx_i = 0.0; + double fy_i = 0.0; + double fz_i = 0.0; + + double torquex_i = 0.0; + double torquey_i = 0.0; + double torquez_i = 0.0; + + for (int jj = 0; jj < jnum; jj++) { + int j = d_neighbors(i,jj); + j &= NEIGHMASK; + + double delx = xtmp - x(j,0); + double dely = ytmp - x(j,1); + double delz = ztmp - x(j,2); + double rsq = delx*delx + dely*dely + delz*delz; + int jtype = type[j]; + double jmass = rmass[j]; + double jrad = radius[j]; + double radsum = irad + jrad; + + // check for touching neighbors + + if (rsq >= radsum * radsum) { + d_firsttouch(i,jj) = 0; + d_firstshear(i,3*jj) = 0; + d_firstshear(i,3*jj+1) = 0; + d_firstshear(i,3*jj+2) = 0; + } else { + double r = sqrt(rsq); + double rinv = 1.0/r; + double rsqinv = 1/rsq; + + // relative translational velocity + + double vr1 = v(i,0) - v(j,0); + double vr2 = v(i,1) - v(j,1); + double vr3 = v(i,2) - v(j,2); + + // normal component + + double vnnr = vr1*delx + vr2*dely + vr3*delz; + double vn1 = delx*vnnr * rsqinv; + double vn2 = dely*vnnr * rsqinv; + double vn3 = delz*vnnr * rsqinv; + + // tangential component + + double vt1 = vr1 - vn1; + double vt2 = vr2 - vn2; + double vt3 = vr3 - vn3; + + // relative rotational velocity + + double wr1 = (irad*omega(i,0) + jrad*omega(j,0)) * rinv; + double wr2 = (irad*omega(i,1) + jrad*omega(j,1)) * rinv; + double wr3 = (irad*omega(i,2) + jrad*omega(j,2)) * rinv; + + double meff = imass*jmass / (imass+jmass); + if (mask[i] & freeze_group_bit) meff = jmass; + if (mask[j] & freeze_group_bit) meff = imass; + + double damp = meff*gamman*vnnr*rsqinv; + double ccel = kn*(radsum-r)*rinv - damp; + + // relative velocities + + double vtr1 = vt1 - (delz*wr2-dely*wr3); + double vtr2 = vt2 - (delx*wr3-delz*wr1); + double vtr3 = vt3 - (dely*wr1-delx*wr2); + double vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3; + vrel = sqrt(vrel); + + // shear history effects + + d_firsttouch(i,jj) = 1; + double shear1 = d_firstshear(i,3*jj); + double shear2 = d_firstshear(i,3*jj+1); + double shear3 = d_firstshear(i,3*jj+2); + if (SHEARUPDATE) { + shear1 += vtr1*dt; + shear2 += vtr2*dt; + shear3 += vtr3*dt; + } + double shrmag = sqrt(shear1*shear1 + shear2*shear2 + + shear3*shear3); + + // rotate shear displacements + + double rsht = shear1*delx + shear2*dely + shear3*delz; + rsht *= rsqinv; + if (SHEARUPDATE) { + shear1 -= rsht*delx; + shear2 -= rsht*dely; + shear3 -= rsht*delz; + } + + // tangential forces = shear + tangential velocity damping + + double fs1 = - (kt*shear1 + meff*gammat*vtr1); + double fs2 = - (kt*shear2 + meff*gammat*vtr2); + double fs3 = - (kt*shear3 + meff*gammat*vtr3); + + // rescale frictional displacements and forces if needed + + double fs = sqrt(fs1*fs1 + fs2*fs2 + fs3*fs3); + double fn = xmu * fabs(ccel*r); + + if (fs > fn) { + if (shrmag != 0.0) { + shear1 = (fn/fs) * (shear1 + meff*gammat*vtr1/kt) - + meff*gammat*vtr1/kt; + shear2 = (fn/fs) * (shear2 + meff*gammat*vtr2/kt) - + meff*gammat*vtr2/kt; + shear3 = (fn/fs) * (shear3 + meff*gammat*vtr3/kt) - + meff*gammat*vtr3/kt; + fs1 *= fn/fs; + fs2 *= fn/fs; + fs3 *= fn/fs; + } else fs1 = fs2 = fs3 = 0.0; + } + + if (SHEARUPDATE) { + d_firstshear(i,3*jj) = shear1; + d_firstshear(i,3*jj+1) = shear2; + d_firstshear(i,3*jj+2) = shear3; + } + + // forces & torques + + double fx = delx*ccel + fs1; + double fy = dely*ccel + fs2; + double fz = delz*ccel + fs3; + fx_i += fx; + fy_i += fy; + fz_i += fz; + + double tor1 = rinv * (dely*fs3 - delz*fs2); + double tor2 = rinv * (delz*fs1 - delx*fs3); + double tor3 = rinv * (delx*fs2 - dely*fs1); + torquex_i -= irad*tor1; + torquey_i -= irad*tor2; + torquez_i -= irad*tor3; + + if (NEWTON_PAIR || j < nlocal) { + a_f(j,0) -= fx; + a_f(j,1) -= fy; + a_f(j,2) -= fz; + a_torque(j,0) -= jrad*tor1; + a_torque(j,1) -= jrad*tor2; + a_torque(j,2) -= jrad*tor3; + } + } + } + + a_f(i,0) += fx_i; + a_f(i,1) += fy_i; + a_f(i,2) += fz_i; + a_torque(i,0) += torquex_i; + a_torque(i,1) += torquey_i; + a_torque(i,2) += torquez_i; +} + +namespace LAMMPS_NS { +template class PairGranHookeHistoryKokkos<LMPDeviceType>; +#ifdef KOKKOS_HAVE_CUDA +template class PairGranHookeHistoryKokkos<LMPHostType>; +#endif +} diff --git a/src/KOKKOS/pair_gran_hooke_history_kokkos.h b/src/KOKKOS/pair_gran_hooke_history_kokkos.h new file mode 100644 index 0000000000000000000000000000000000000000..84dc4120781681f33db4c465cdab4200677967e1 --- /dev/null +++ b/src/KOKKOS/pair_gran_hooke_history_kokkos.h @@ -0,0 +1,127 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(gran/hooke/history/kk,PairGranHookeHistoryKokkos<LMPDeviceType>) +PairStyle(gran/hooke/history/kk/device,PairGranHookeHistoryKokkos<LMPDeviceType>) +PairStyle(gran/hooke/history/kk/host,PairGranHookeHistoryKokkos<LMPHostType>) + +#else + +#ifndef LMP_PAIR_GRAN_HOOKE_HISTORY_KOKKOS_H +#define LMP_PAIR_GRAN_HOOKE_HISTORY_KOKKOS_H + +#include "pair_gran_hooke_history.h" +#include "pair_kokkos.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +template <class DeviceType> +class FixNeighHistoryKokkos; + +template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int SHEARUPDATE> +struct TagPairGranHookeHistoryCompute{}; + +template <class DeviceType> +class PairGranHookeHistoryKokkos : public PairGranHookeHistory { + public: + typedef DeviceType device_type; + typedef ArrayTypes<DeviceType> AT; + + PairGranHookeHistoryKokkos(class LAMMPS *); + virtual ~PairGranHookeHistoryKokkos(); + virtual void compute(int, int); + void init_style(); + + template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int SHEARUPDATE> + KOKKOS_INLINE_FUNCTION + void operator()(TagPairGranHookeHistoryCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,SHEARUPDATE>, const int&) const; + + protected: + typename AT::t_x_array_randomread x; + typename AT::t_x_array c_x; + typename AT::t_v_array_randomread v; + typename AT::t_v_array_randomread omega; + typename AT::t_f_array f; + typename AT::t_f_array torque; + typename AT::t_int_1d_randomread type; + typename AT::t_int_1d_randomread mask; + typename AT::t_float_1d_randomread rmass; + typename AT::t_float_1d_randomread radius; + + DAT::tdual_efloat_1d k_eatom; + DAT::tdual_virial_array k_vatom; + typename AT::t_efloat_1d d_eatom; + typename AT::t_virial_array d_vatom; + typename AT::t_tagint_1d tag; + + typename AT::t_neighbors_2d d_neighbors; + typename AT::t_int_1d_randomread d_ilist; + typename AT::t_int_1d_randomread d_numneigh; + + typename AT::t_int_2d d_firsttouch; + typename AT::t_float_2d d_firstshear; + + int newton_pair; + double special_lj[4]; + + int neighflag; + int nlocal,nall,eflag,vflag; + + FixNeighHistoryKokkos<DeviceType> *fix_historyKK; + + friend void pair_virial_fdotr_compute<PairGranHookeHistoryKokkos>(PairGranHookeHistoryKokkos*); +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Illegal ... command + +Self-explanatory. Check the input script syntax and compare to the +documentation for the command. You can use -echo screen as a +command-line option when running LAMMPS to see the offending line. + +E: Incorrect args for pair coefficients + +Self-explanatory. Check the input script or data file. + +E: Pair granular requires atom attributes radius, rmass + +The atom style defined does not have these attributes. + +E: Pair granular requires ghost atoms store velocity + +Use the comm_modify vel yes command to enable this. + +E: Could not find pair fix neigh history ID + +UNDOCUMENTED + +U: Pair granular with shear history requires newton pair off + +This is a current restriction of the implementation of pair +granular styles with history. + +U: Could not find pair fix ID + +A fix is created internally by the pair style to store shear +history information. You cannot delete it. + +*/ diff --git a/src/KOKKOS/verlet_kokkos.cpp b/src/KOKKOS/verlet_kokkos.cpp index 136e543a2cd708501004d2f94290b21454dabe0a..417de2d94aec73baaa56f14dc7a100d9bed69fa7 100644 --- a/src/KOKKOS/verlet_kokkos.cpp +++ b/src/KOKKOS/verlet_kokkos.cpp @@ -55,6 +55,20 @@ struct ForceAdder { /* ---------------------------------------------------------------------- */ +template<class View> +struct Zero { + View v; + Zero(const View &v_):v(v_) {} + KOKKOS_INLINE_FUNCTION + void operator()(const int &i) const { + v(i,0) = 0; + v(i,1) = 0; + v(i,2) = 0; + } +}; + +/* ---------------------------------------------------------------------- */ + VerletKokkos::VerletKokkos(LAMMPS *lmp, int narg, char **arg) : Verlet(lmp, narg, arg) { @@ -120,6 +134,7 @@ void VerletKokkos::setup(int flag) atomKK->modified(Host,ALL_MASK); neighbor->build(1); + modify->setup_post_neighbor(); neighbor->ncalls = 0; // compute all forces @@ -175,7 +190,7 @@ void VerletKokkos::setup(int flag) modify->setup(vflag); output->setup(flag); lmp->kokkos->auto_sync = 1; - update->setupflag = 1; + update->setupflag = 0; } /* ---------------------------------------------------------------------- @@ -223,6 +238,7 @@ void VerletKokkos::setup_minimal(int flag) atomKK->modified(Host,ALL_MASK); neighbor->build(1); + modify->setup_post_neighbor(); neighbor->ncalls = 0; } @@ -567,8 +583,6 @@ void VerletKokkos::run(int n) void VerletKokkos::force_clear() { - int i; - if (external_force_clear) return; // clear force on all particles @@ -576,23 +590,15 @@ void VerletKokkos::force_clear() // when using threads always clear all forces. if (neighbor->includegroup == 0) { - int nall; - if (force->newton) nall = atomKK->nlocal + atomKK->nghost; - else nall = atomKK->nlocal; + int nall = atomKK->nlocal; + if (force->newton) nall += atomKK->nghost; - size_t nbytes = sizeof(double) * nall; - - if (nbytes) { - if (atomKK->k_f.modified_host() > atomKK->k_f.modified_device()) { - memset_kokkos(atomKK->k_f.view<LMPHostType>()); - atomKK->modified(Host,F_MASK); - atomKK->sync(Device,F_MASK); - } else { - memset_kokkos(atomKK->k_f.view<LMPDeviceType>()); - atomKK->modified(Device,F_MASK); - } - if (torqueflag) memset(&(atomKK->torque[0][0]),0,3*nbytes); + Kokkos::parallel_for(nall, Zero<typename ArrayTypes<LMPDeviceType>::t_f_array>(atomKK->k_f.view<LMPDeviceType>())); + atomKK->modified(Device,F_MASK); + if (torqueflag) { + Kokkos::parallel_for(nall, Zero<typename ArrayTypes<LMPDeviceType>::t_f_array>(atomKK->k_torque.view<LMPDeviceType>())); + atomKK->modified(Device,TORQUE_MASK); } // neighbor includegroup flag is set @@ -600,35 +606,23 @@ void VerletKokkos::force_clear() // if either newton flag is set, also include ghosts } else { - int nall = atomKK->nfirst; - if (atomKK->k_f.modified_host() > atomKK->k_f.modified_device()) { - memset_kokkos(atomKK->k_f.view<LMPHostType>()); - atomKK->modified(Host,F_MASK); - } else { - memset_kokkos(atomKK->k_f.view<LMPDeviceType>()); - atomKK->modified(Device,F_MASK); - } + Kokkos::parallel_for(atomKK->nfirst, Zero<typename ArrayTypes<LMPDeviceType>::t_f_array>(atomKK->k_f.view<LMPDeviceType>())); + atomKK->modified(Device,F_MASK); + if (torqueflag) { - double **torque = atomKK->torque; - for (i = 0; i < nall; i++) { - torque[i][0] = 0.0; - torque[i][1] = 0.0; - torque[i][2] = 0.0; - } + Kokkos::parallel_for(atomKK->nfirst, Zero<typename ArrayTypes<LMPDeviceType>::t_f_array>(atomKK->k_torque.view<LMPDeviceType>())); + atomKK->modified(Device,TORQUE_MASK); } if (force->newton) { - nall = atomKK->nlocal + atomKK->nghost; + auto range = Kokkos::RangePolicy<LMPDeviceType>(atomKK->nlocal, atomKK->nlocal + atomKK->nghost); + Kokkos::parallel_for(range, Zero<typename ArrayTypes<LMPDeviceType>::t_f_array>(atomKK->k_f.view<LMPDeviceType>())); + atomKK->modified(Device,F_MASK); if (torqueflag) { - double **torque = atomKK->torque; - for (i = atomKK->nlocal; i < nall; i++) { - torque[i][0] = 0.0; - torque[i][1] = 0.0; - torque[i][2] = 0.0; - } + Kokkos::parallel_for(range, Zero<typename ArrayTypes<LMPDeviceType>::t_f_array>(atomKK->k_torque.view<LMPDeviceType>())); + atomKK->modified(Device,TORQUE_MASK); } - } } } diff --git a/src/comm_brick.cpp b/src/comm_brick.cpp index 3aa6d7748ce96b2c095857ed5e9f6c2acbc9314d..f4e733a5acdc58958f21e904c831367c264536d6 100644 --- a/src/comm_brick.cpp +++ b/src/comm_brick.cpp @@ -609,7 +609,7 @@ void CommBrick::exchange() maxexchange = maxexchange_atom + maxexchange_fix; bufextra = maxexchange + BUFEXTRA; if (bufextra > bufextra_old) - memory->grow(buf_send,maxsend+bufextra,"comm:buf_send"); + grow_send(maxsend+bufextra,1); // subbox bounds for orthogonal or triclinic diff --git a/src/fix_neigh_history.cpp b/src/fix_neigh_history.cpp index 3329b604efa4a0e2f6310e34cce95d7ca6902a14..90b77821e68cb47e06c9cafb8fc4eeed97f6d56e 100644 --- a/src/fix_neigh_history.cpp +++ b/src/fix_neigh_history.cpp @@ -95,6 +95,8 @@ FixNeighHistory::FixNeighHistory(LAMMPS *lmp, int narg, char **arg) : FixNeighHistory::~FixNeighHistory() { + if (copymode) return; + // unregister this fix so atom class doesn't invoke it any more atom->delete_callback(id,0);